blob: 1bdf22079e6ed3561e431eb073852e9e66d2867d [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080022#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
30#include <xnnpack/igemm.h>
31#include <xnnpack/log.h>
32#include <xnnpack/lut.h>
33#include <xnnpack/maxpool.h>
34#include <xnnpack/pad.h>
35#include <xnnpack/params.h>
36#include <xnnpack/pavgpool.h>
37#include <xnnpack/prelu.h>
38#include <xnnpack/rmax.h>
39#include <xnnpack/spmm.h>
40#include <xnnpack/unpool.h>
41#include <xnnpack/vadd.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080042#include <xnnpack/vbinop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070043#include <xnnpack/vmulcaddc.h>
Marat Dukhan346a9e52019-11-15 09:06:30 -080044#include <xnnpack/vunop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/zip.h>
46
47#ifndef XNN_ENABLE_ASSEMBLY
48 #define XNN_ENABLE_ASSEMBLY 1
49#endif
50
51static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
52
53struct xnn_parameters xnn_params = {
54 .initialized = false
55};
56
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
59#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070060#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070061 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
62#endif
63
64static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070065#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070066 if (!cpuinfo_has_arm_neon()) {
67 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
68 return;
69 }
70
71 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070072 #ifndef XNN_NO_Q8_OPERATORS
73 xnn_params.q8.gemm = (struct gemm_parameters) {
74 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
75 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
76 .mr = 4,
77 .nr = 8,
78 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070079
Marat Dukhan8fe54e42019-10-10 14:12:59 -070080 #if XNN_ENABLE_ASSEMBLY
81 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
82 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
83 .cr = 8,
84 .mr = 9,
85 };
86 #else
87 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
88 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
89 .cr = 8,
90 .mr = 9,
91 };
92 #endif
93 xnn_params.q8.avgpool = (struct avgpool_parameters) {
94 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
95 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
96 .mr = 9,
97 .qr = 8,
98 };
99 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
100 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
101 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
102 .mr = 7,
103 };
104 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
105 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106
107 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700108 #ifndef XNN_NO_U8_OPERATORS
109 xnn_params.u8.maxpool = (struct maxpool_parameters) {
110 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
111 .mr = 9,
112 .qr = 8,
113 };
114 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
115 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
116 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
117 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118
119 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700120 #ifndef XNN_NO_X8_OPERATORS
121 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
122 xnn_params.x8.zip = (struct zip_parameters) {
123 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
124 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
125 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
126 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
127 };
128 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129
130 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700131 #ifndef XNN_NO_F32_OPERATORS
132 xnn_params.f32.gemm = (struct gemm_parameters) {
133 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
134 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
135 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
136 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
137 .mr = 4,
138 .nr = 8,
139 };
140 xnn_params.f32.gemm2 = (struct gemm_parameters) {
141 .gemm = NULL,
142 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
143 .mr = 4,
144 .nr = 2,
145 };
146 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
148 .cr = 4,
149 .mr = 4,
150 };
151 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
152 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
153 .cr = 4,
154 .mr = 9,
155 };
156 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
157 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
158 .cr = 4,
159 .mr = 25,
160 };
161 xnn_params.f32.avgpool = (struct avgpool_parameters) {
162 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
163 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
164 .mr = 9,
165 .qr = 8,
166 };
167 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
168 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
169 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
174 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
175 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
176 .mr = 7,
177 };
178 xnn_params.f32.maxpool = (struct maxpool_parameters) {
179 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
180 .mr = 9,
181 .qr = 8,
182 };
183 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
184 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
185 .mr = 4,
186 };
187 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
188 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
189 .mr = 9,
190 };
191 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
192 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
193 .mr = 9,
194 .qr = 8,
195 };
Marat Dukhan69722492019-11-11 19:55:50 -0800196 xnn_params.f32.bilinear = (struct bilinear_parameters) {
197 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
198 .pixel_tile = 1,
199 .channel_tile = 8,
200 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700201 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
202 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
203 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800204 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
205 .row_tile = 2,
206 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700207 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800208 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700209 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800210 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
211 .channel_tile = 4,
212 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700213 };
214 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215
216 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700217 #ifndef XNN_NO_X32_OPERATORS
218 xnn_params.x32.pad = (struct pad_parameters) {
219 .ukernel = xnn_x32_pad_x2__neon,
220 .mr = 2,
221 };
222 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
223 xnn_params.x32.zip = (struct zip_parameters) {
224 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
225 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
226 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
227 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
228 };
229 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700230
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700231#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700232
233 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700234 #ifndef XNN_NO_Q8_OPERATORS
235 xnn_params.q8.gemm = (struct gemm_parameters) {
236 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
237 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
238 .mr = 8,
239 .nr = 8,
240 };
241 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
242 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
243 .cr = 8,
244 .mr = 9,
245 };
246 xnn_params.q8.avgpool = (struct avgpool_parameters) {
247 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
248 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
249 .mr = 9,
250 .qr = 8,
251 };
252 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
253 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
254 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
255 .mr = 7,
256 };
257 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
258 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700259
260 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700261 #ifndef XNN_NO_U8_OPERATORS
262 xnn_params.u8.maxpool = (struct maxpool_parameters) {
263 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
264 .mr = 9,
265 .qr = 8,
266 };
267 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
268 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
269 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
270 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700271
272 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700273 #ifndef XNN_NO_X8_OPERATORS
274 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
275 xnn_params.x8.zip = (struct zip_parameters) {
276 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
277 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
278 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
279 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
280 };
281 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700282
283 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700284 #ifndef XNN_NO_F32_OPERATORS
285 #if XNN_ENABLE_ASSEMBLY
286 switch (cpuinfo_get_core(0)->uarch) {
287 case cpuinfo_uarch_kryo:
288 xnn_params.f32.gemm = (struct gemm_parameters) {
289 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
290 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
291 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
292 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
293 .mr = 4,
294 .nr = 8,
295 };
296 break;
297 case cpuinfo_uarch_cortex_a57:
298 xnn_params.f32.gemm = (struct gemm_parameters) {
299 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
300 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
301 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
302 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
303 .mr = 6,
304 .nr = 8,
305 };
306 break;
307 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700308 xnn_params.f32.gemm = (struct gemm_parameters) {
309 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
310 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
311 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
312 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
313 .mr = 4,
314 .nr = 8,
315 };
316 break;
317 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700318 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700319 case cpuinfo_uarch_mongoose_m1:
320 case cpuinfo_uarch_mongoose_m2:
321 case cpuinfo_uarch_meerkat_m3:
322 case (cpuinfo_uarch_meerkat_m3 + 1):
323 xnn_params.f32.gemm = (struct gemm_parameters) {
324 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
325 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
326 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
327 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
328 .mr = 6,
329 .nr = 8,
330 };
331 break;
332 case cpuinfo_uarch_cortex_a53:
333 case cpuinfo_uarch_cortex_a55:
334 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700335 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
336 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
337 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
338 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
339 .mr = 6,
340 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700341 };
342 break;
343 case cpuinfo_uarch_cortex_a73:
344 xnn_params.f32.gemm = (struct gemm_parameters) {
345 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
346 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
347 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
348 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
349 .mr = 6,
350 .nr = 8,
351 };
352 break;
353 default:
354 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700355 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
356 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700357 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
358 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700359 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700360 .nr = 8,
361 };
362 break;
363 }
364 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700365 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700366 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
367 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700368 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
369 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700370 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 .nr = 8,
372 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700373 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700374
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700375 xnn_params.f32.gemm2 = (struct gemm_parameters) {
376 .gemm = NULL,
377 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
378 .mr = 4,
379 .nr = 2,
380 };
381 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
382 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
383 .cr = 4,
384 .mr = 4,
385 };
386 switch (cpuinfo_get_core(0)->uarch) {
387 case cpuinfo_uarch_kryo:
388 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
389 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
390 .cr = 4,
391 .mr = 9,
392 };
393 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700394#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700395 case cpuinfo_uarch_cortex_a53:
396 case cpuinfo_uarch_cortex_a55:
397 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
398 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
399 .cr = 4,
400 .mr = 9,
401 };
402 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700403#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700404 default:
405 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
406 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
407 .cr = 8,
408 .mr = 9,
409 };
410 break;
411 }
412 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
413 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
414 .cr = 4,
415 .mr = 25,
416 };
417 xnn_params.f32.avgpool = (struct avgpool_parameters) {
418 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
419 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
420 .mr = 9,
421 .qr = 8,
422 };
423 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
424 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
425 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
426 .mr = 9,
427 .qr = 8,
428 };
429 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
430 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
431 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
432 .mr = 7,
433 };
434 xnn_params.f32.maxpool = (struct maxpool_parameters) {
435 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
436 .mr = 9,
437 .qr = 8,
438 };
439 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
440 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
441 .mr = 4,
442 };
443 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
444 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
445 .mr = 9,
446 };
447 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
448 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
449 .mr = 9,
450 .qr = 8,
451 };
Marat Dukhan69722492019-11-11 19:55:50 -0800452 xnn_params.f32.bilinear = (struct bilinear_parameters) {
453 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
454 .pixel_tile = 1,
455 .channel_tile = 8,
456 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700457 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
458 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800459 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neonfma_p5_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700460 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800461 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
462 .row_tile = 2,
463 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700464 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800465 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700466 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800467 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
468 .channel_tile = 4,
469 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700470 };
471 #ifndef XNN_NO_SPNCHW_OPERATORS
472 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700473 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700474 .mr = 16,
475 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700476 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700477 xnn_params.f32.spmm2 = (struct spmm_parameters) {
478 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
479 .mr = 16,
480 .nr = 2,
481 };
482 xnn_params.f32.spmm4 = (struct spmm_parameters) {
483 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
484 .mr = 16,
485 .nr = 4,
486 };
487 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
488 .ukernel_with_symm_padding =
489 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
490 .output_channel_tile = 4,
491 .output_height_tile = 2,
492 .output_width_tile = 2,
493 };
494 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
495 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
496 .input_width_tile = 4,
497 .output_width_tile = 4,
498 .output_height_tile = 3,
499 };
500 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
501 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
502 .input_width_tile = 4,
503 .output_width_tile = 4,
504 .output_height_tile = 1,
505 };
506 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
507 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
508 .channel_tile = 4,
509 };
510 #endif // XNN_NO_SPNCHW_OPERATORS
511 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700512
513 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700514 #ifndef XNN_NO_X32_OPERATORS
515 xnn_params.x32.pad = (struct pad_parameters) {
516 .ukernel = xnn_x32_pad_x2__neon,
517 .mr = 2,
518 };
519 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
520 xnn_params.x32.zip = (struct zip_parameters) {
521 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
522 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
523 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
524 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
525 };
526 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700527
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700528#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700529 if (!cpuinfo_has_x86_sse2()) {
530 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
531 return;
532 }
533
534 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700535 #ifndef XNN_NO_Q8_OPERATORS
536 xnn_params.q8.gemm = (struct gemm_parameters) {
537 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
538 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
539 .mr = 4,
540 .nr = 4,
541 .log2_kr = 1,
542 };
543 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
544 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
545 .cr = 8,
546 .mr = 9,
547 };
548 xnn_params.q8.avgpool = (struct avgpool_parameters) {
549 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
550 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
551 .mr = 9,
552 .qr = 8,
553 };
554 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
555 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
556 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
557 .mr = 7,
558 };
559 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
560 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700561
562 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700563 #ifndef XNN_NO_U8_OPERATORS
564 xnn_params.u8.maxpool = (struct maxpool_parameters) {
565 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
566 .mr = 9,
567 .qr = 8,
568 };
569 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
570 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
571 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
572 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700573
574 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700575 #ifndef XNN_NO_X8_OPERATORS
576 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
577 xnn_params.x8.zip = (struct zip_parameters) {
578 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
579 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
580 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
581 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
582 };
583 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700584
585 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700586 #ifndef XNN_NO_F32_OPERATORS
587 xnn_params.f32.gemm = (struct gemm_parameters) {
588 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
589 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
590 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
591 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
592 .mr = 4,
593 .nr = 8,
594 };
595 xnn_params.f32.gemm2 = (struct gemm_parameters) {
596 .gemm = NULL,
597 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
598 .mr = 4,
599 .nr = 2,
600 .log2_kr = 2,
601 };
602 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800603 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
604 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700605 .mr = 4,
606 };
607 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800608 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
609 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700610 .mr = 9,
611 };
612 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800613 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
614 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700615 .mr = 25,
616 };
617 xnn_params.f32.avgpool = (struct avgpool_parameters) {
618 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
619 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
620 .mr = 9,
621 .qr = 8,
622 };
623 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
624 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
625 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
626 .mr = 9,
627 .qr = 8,
628 };
629 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
630 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
631 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
632 .mr = 7,
633 };
634 xnn_params.f32.maxpool = (struct maxpool_parameters) {
635 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
636 .mr = 9,
637 .qr = 8,
638 };
639 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
640 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
641 .mr = 4,
642 };
643 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
644 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
645 .mr = 9,
646 };
647 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
648 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
649 .mr = 9,
650 .qr = 8,
651 };
Marat Dukhan69722492019-11-11 19:55:50 -0800652 xnn_params.f32.bilinear = (struct bilinear_parameters) {
653 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
654 .pixel_tile = 1,
655 .channel_tile = 8,
656 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700657 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
658 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
659 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800660 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
661 .row_tile = 2,
662 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700663 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800664 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700665 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800666 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
667 .channel_tile = 4,
668 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700669 };
670 #ifndef XNN_NO_SPNCHW_OPERATORS
671 xnn_params.f32.spmm = (struct spmm_parameters) {
672 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
673 .mr = 4,
674 .nr = 1,
675 };
676 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
677 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
678 .input_width_tile = 4,
679 .output_width_tile = 4,
680 .output_height_tile = 1,
681 };
682 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
683 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
684 .input_width_tile = 4,
685 .output_width_tile = 4,
686 .output_height_tile = 1,
687 };
688 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
689 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
690 .channel_tile = 4,
691 };
692 #endif // XNN_NO_SPNCHW_OPERATORS
693 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700694
695 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700696 #ifndef XNN_NO_X32_OPERATORS
697 xnn_params.x32.pad = (struct pad_parameters) {
698 .ukernel = xnn_x32_pad_x2__sse2,
699 .mr = 2,
700 };
701 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
702 xnn_params.x32.zip = (struct zip_parameters) {
703 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
704 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
705 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
706 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
707 };
708 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700709
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700710#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700711 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
712 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
713 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
714 // of two infinities (must produce NaN per IEEE 754 standard).
715 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
716 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
717
XNNPACK Teamb455b122019-09-27 18:10:33 -0700718 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700719 #ifndef XNN_NO_Q8_OPERATORS
720 xnn_params.q8.gemm = (struct gemm_parameters) {
721 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
722 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
723 .mr = 2,
724 .nr = 2,
725 };
726 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
727 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
728 .cr = 1,
729 .mr = 9,
730 };
731 xnn_params.q8.avgpool = (struct avgpool_parameters) {
732 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
733 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
734 .mr = 9,
735 .qr = 8,
736 };
737 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
738 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
739 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
740 .mr = 7,
741 };
742 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
743 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700744
745 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700746 #ifndef XNN_NO_U8_OPERATORS
747 xnn_params.u8.maxpool = (struct maxpool_parameters) {
748 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
749 .mr = 9,
750 .qr = 8,
751 };
752 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
753 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
754 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
755 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700756
757 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700758 #ifndef XNN_NO_X8_OPERATORS
759 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
760 xnn_params.x8.zip = (struct zip_parameters) {
761 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
762 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
763 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
764 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
765 };
766 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700767
768 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700769 #ifndef XNN_NO_F32_OPERATORS
770 if (is_wasm_x86) {
771 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700772 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
773 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
774 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
775 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700776 .mr = 4,
777 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700778 };
779 } else {
780 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700781 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
782 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
783 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
784 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700785 .mr = 6,
786 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700787 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700788 };
789 }
790 xnn_params.f32.gemm2 = (struct gemm_parameters) {
791 .gemm = NULL,
792 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700793 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700794 .nr = 2,
795 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700796 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700797 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800798 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700799 .cr = 4,
800 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700801 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700802 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800803 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700804 .cr = 4,
805 .mr = 9,
806 };
807 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800808 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700809 .cr = 4,
810 .mr = 25,
811 };
812 xnn_params.f32.avgpool = (struct avgpool_parameters) {
813 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
814 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
815 .mr = 9,
816 .qr = 8,
817 };
818 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
819 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
820 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
821 .mr = 9,
822 .qr = 8,
823 };
824 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
825 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
826 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
827 .mr = 7,
828 };
829 xnn_params.f32.maxpool = (struct maxpool_parameters) {
830 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
831 .mr = 9,
832 .qr = 8,
833 };
834 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
835 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
836 .mr = 4,
837 };
838 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
839 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
840 .mr = 9,
841 };
842 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
843 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
844 .mr = 9,
845 .qr = 8,
846 };
Marat Dukhan69722492019-11-11 19:55:50 -0800847 xnn_params.f32.bilinear = (struct bilinear_parameters) {
848 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
849 .pixel_tile = 1,
850 .channel_tile = 8,
851 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700852 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
853 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
854 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800855 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
856 .row_tile = 2,
857 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700858 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800859 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700860 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800861 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
862 .channel_tile = 4,
863 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700864 };
865 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700866
867 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700868 #ifndef XNN_NO_X32_OPERATORS
869 xnn_params.x32.pad = (struct pad_parameters) {
870 .ukernel = xnn_x32_pad_x2__psimd,
871 .mr = 2,
872 };
873 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
874 xnn_params.x32.zip = (struct zip_parameters) {
875 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
876 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
877 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
878 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
879 };
880 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700881
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700882#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700883 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
884 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
885 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
886 // of two infinities (must produce NaN per IEEE 754 standard).
887 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
888 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
889
890 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700891 #ifndef XNN_NO_Q8_OPERATORS
892 xnn_params.q8.gemm = (struct gemm_parameters) {
893 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
894 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
895 .mr = 2,
896 .nr = 2,
897 };
898 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
899 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
900 .cr = 1,
901 .mr = 9,
902 };
903 xnn_params.q8.avgpool = (struct avgpool_parameters) {
904 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
905 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
906 .mr = 9,
907 .qr = 8,
908 };
909 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
910 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
911 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
912 .mr = 7,
913 };
914 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
915 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700916
917 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700918 #ifndef XNN_NO_U8_OPERATORS
919 xnn_params.u8.maxpool = (struct maxpool_parameters) {
920 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
921 .mr = 9,
922 .qr = 8,
923 };
924 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
925 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
926 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
927 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700928
929 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700930 #ifndef XNN_NO_X8_OPERATORS
931 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
932 xnn_params.x8.zip = (struct zip_parameters) {
933 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
934 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
935 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
936 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
937 };
938 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700939
940 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700941 #ifndef XNN_NO_F32_OPERATORS
942 if (is_wasm_x86) {
943 xnn_params.f32.gemm = (struct gemm_parameters) {
944 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
945 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
946 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
947 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
948 .mr = 2,
949 .nr = 4,
950 };
951 } else {
952 xnn_params.f32.gemm = (struct gemm_parameters) {
953 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
954 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
955 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
956 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
957 .mr = 4,
958 .nr = 4,
959 };
960 }
961 xnn_params.f32.gemm2 = (struct gemm_parameters) {
962 .gemm = NULL,
963 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700965 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700966 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700967 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800968 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700969 .cr = 1,
970 .mr = 4,
971 };
972 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800973 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700974 .cr = 1,
975 .mr = 9,
976 };
977 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800978 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700979 .cr = 1,
980 .mr = 25,
981 };
982 xnn_params.f32.avgpool = (struct avgpool_parameters) {
983 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
984 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
985 .mr = 9,
986 .qr = 8,
987 };
988 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
989 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
990 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
991 .mr = 9,
992 .qr = 8,
993 };
994 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
995 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
996 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
997 .mr = 7,
998 };
999 xnn_params.f32.maxpool = (struct maxpool_parameters) {
1000 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
1001 .mr = 9,
1002 .qr = 8,
1003 };
1004 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1005 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1006 .mr = 4,
1007 };
1008 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1009 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1010 .mr = 9,
1011 };
1012 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1013 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1014 .mr = 9,
1015 .qr = 8,
1016 };
Marat Dukhan69722492019-11-11 19:55:50 -08001017 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1018 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1019 .pixel_tile = 1,
1020 .channel_tile = 2,
1021 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001022 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1023 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1024 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001025 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1026 .row_tile = 4,
1027 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001028 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001029 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001030 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001031 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1032 .channel_tile = 1,
1033 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001034 };
1035 #ifndef XNN_NO_SPNCHW_OPERATORS
1036 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001037 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1038 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001039 .nr = 1,
1040 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001041 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1042 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1043 .mr = 8,
1044 .nr = 2,
1045 };
1046 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1047 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1048 .mr = 8,
1049 .nr = 4,
1050 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001051 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1052 .ukernel_with_symm_padding =
1053 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1054 .output_channel_tile = 4,
1055 .output_height_tile = 1,
1056 .output_width_tile = 1,
1057 };
1058 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1059 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1060 .input_width_tile = 1,
1061 .output_width_tile = 1,
1062 .output_height_tile = 1,
1063 };
1064 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1065 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1066 .input_width_tile = 1,
1067 .output_width_tile = 1,
1068 .output_height_tile = 1,
1069 };
1070 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1071 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1072 .channel_tile = 1,
1073 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001074 #endif // XNN_NO_SPNCHW_OPERATORS
1075 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001076
1077 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001078 #ifndef XNN_NO_X32_OPERATORS
1079 xnn_params.x32.pad = (struct pad_parameters) {
1080 .ukernel = xnn_x32_pad_x2__scalar,
1081 .mr = 2,
1082 };
1083 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1084 xnn_params.x32.zip = (struct zip_parameters) {
1085 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1086 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1087 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1088 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1089 };
1090 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001091
1092#else
1093 #error "Unsupported architecture"
1094#endif
1095 xnn_params.initialized = true;
1096}
1097
1098enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001099 #ifndef __EMSCRIPTEN__
1100 if (!cpuinfo_initialize()) {
1101 return xnn_status_out_of_memory;
1102 }
1103 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001104 pthread_once(&init_guard, &init);
1105 if (xnn_params.initialized) {
1106 return xnn_status_success;
1107 } else {
1108 return xnn_status_unsupported_hardware;
1109 }
1110}
1111
1112enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001113 #ifndef __EMSCRIPTEN__
1114 cpuinfo_deinitialize();
1115 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001116 return xnn_status_success;
1117}