blob: de419d8a85fb2ab1cf1527d334d71a2f2b645b5d [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080022#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
30#include <xnnpack/igemm.h>
31#include <xnnpack/log.h>
32#include <xnnpack/lut.h>
33#include <xnnpack/maxpool.h>
34#include <xnnpack/pad.h>
35#include <xnnpack/params.h>
36#include <xnnpack/pavgpool.h>
37#include <xnnpack/prelu.h>
38#include <xnnpack/rmax.h>
39#include <xnnpack/spmm.h>
40#include <xnnpack/unpool.h>
41#include <xnnpack/vadd.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080042#include <xnnpack/vbinop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070043#include <xnnpack/vmulcaddc.h>
Marat Dukhan346a9e52019-11-15 09:06:30 -080044#include <xnnpack/vunop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/zip.h>
46
47#ifndef XNN_ENABLE_ASSEMBLY
48 #define XNN_ENABLE_ASSEMBLY 1
49#endif
50
51static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
52
53struct xnn_parameters xnn_params = {
54 .initialized = false
55};
56
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
59#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070060#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070061 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
62#endif
63
64static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070065#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070066 if (!cpuinfo_has_arm_neon()) {
67 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
68 return;
69 }
70
71 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070072 #ifndef XNN_NO_Q8_OPERATORS
73 xnn_params.q8.gemm = (struct gemm_parameters) {
74 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
75 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
76 .mr = 4,
77 .nr = 8,
78 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070079
Marat Dukhan8fe54e42019-10-10 14:12:59 -070080 #if XNN_ENABLE_ASSEMBLY
81 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
82 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
83 .cr = 8,
84 .mr = 9,
85 };
86 #else
87 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
88 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
89 .cr = 8,
90 .mr = 9,
91 };
92 #endif
93 xnn_params.q8.avgpool = (struct avgpool_parameters) {
94 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
95 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
96 .mr = 9,
97 .qr = 8,
98 };
99 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
100 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
101 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
102 .mr = 7,
103 };
104 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
105 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106
107 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700108 #ifndef XNN_NO_U8_OPERATORS
109 xnn_params.u8.maxpool = (struct maxpool_parameters) {
110 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
111 .mr = 9,
112 .qr = 8,
113 };
114 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
115 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
116 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
117 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118
119 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700120 #ifndef XNN_NO_X8_OPERATORS
121 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
122 xnn_params.x8.zip = (struct zip_parameters) {
123 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
124 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
125 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
126 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
127 };
128 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129
130 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700131 #ifndef XNN_NO_F32_OPERATORS
132 xnn_params.f32.gemm = (struct gemm_parameters) {
133 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
134 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
135 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
136 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
137 .mr = 4,
138 .nr = 8,
139 };
140 xnn_params.f32.gemm2 = (struct gemm_parameters) {
141 .gemm = NULL,
142 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
143 .mr = 4,
144 .nr = 2,
145 };
146 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
148 .cr = 4,
149 .mr = 4,
150 };
151 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
152 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
153 .cr = 4,
154 .mr = 9,
155 };
156 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
157 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
158 .cr = 4,
159 .mr = 25,
160 };
161 xnn_params.f32.avgpool = (struct avgpool_parameters) {
162 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
163 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
164 .mr = 9,
165 .qr = 8,
166 };
167 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
168 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
169 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
174 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
175 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
176 .mr = 7,
177 };
178 xnn_params.f32.maxpool = (struct maxpool_parameters) {
179 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
180 .mr = 9,
181 .qr = 8,
182 };
183 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
184 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
185 .mr = 4,
186 };
187 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
188 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
189 .mr = 9,
190 };
191 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
192 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
193 .mr = 9,
194 .qr = 8,
195 };
Marat Dukhan69722492019-11-11 19:55:50 -0800196 xnn_params.f32.bilinear = (struct bilinear_parameters) {
197 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
198 .pixel_tile = 1,
199 .channel_tile = 8,
200 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700201 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
202 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
203 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800204 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
205 .row_tile = 2,
206 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700207 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800208 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700209 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800210 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
211 .channel_tile = 4,
212 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700213 };
214 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215
216 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700217 #ifndef XNN_NO_X32_OPERATORS
218 xnn_params.x32.pad = (struct pad_parameters) {
219 .ukernel = xnn_x32_pad_x2__neon,
220 .mr = 2,
221 };
222 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
223 xnn_params.x32.zip = (struct zip_parameters) {
224 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
225 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
226 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
227 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
228 };
229 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700230
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700231#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700232
233 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700234 #ifndef XNN_NO_Q8_OPERATORS
235 xnn_params.q8.gemm = (struct gemm_parameters) {
236 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
237 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
238 .mr = 8,
239 .nr = 8,
240 };
241 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
242 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
243 .cr = 8,
244 .mr = 9,
245 };
246 xnn_params.q8.avgpool = (struct avgpool_parameters) {
247 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
248 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
249 .mr = 9,
250 .qr = 8,
251 };
252 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
253 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
254 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
255 .mr = 7,
256 };
257 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
258 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700259
260 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700261 #ifndef XNN_NO_U8_OPERATORS
262 xnn_params.u8.maxpool = (struct maxpool_parameters) {
263 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
264 .mr = 9,
265 .qr = 8,
266 };
267 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
268 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
269 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
270 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700271
272 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700273 #ifndef XNN_NO_X8_OPERATORS
274 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
275 xnn_params.x8.zip = (struct zip_parameters) {
276 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
277 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
278 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
279 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
280 };
281 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700282
283 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700284 #ifndef XNN_NO_F32_OPERATORS
285 #if XNN_ENABLE_ASSEMBLY
286 switch (cpuinfo_get_core(0)->uarch) {
287 case cpuinfo_uarch_kryo:
288 xnn_params.f32.gemm = (struct gemm_parameters) {
289 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
290 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
291 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
292 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
293 .mr = 4,
294 .nr = 8,
295 };
296 break;
297 case cpuinfo_uarch_cortex_a57:
298 xnn_params.f32.gemm = (struct gemm_parameters) {
299 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
300 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
301 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
302 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
303 .mr = 6,
304 .nr = 8,
305 };
306 break;
307 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700308 xnn_params.f32.gemm = (struct gemm_parameters) {
309 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
310 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
311 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
312 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
313 .mr = 4,
314 .nr = 8,
315 };
316 break;
317 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700318 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700319 case cpuinfo_uarch_mongoose_m1:
320 case cpuinfo_uarch_mongoose_m2:
321 case cpuinfo_uarch_meerkat_m3:
322 case (cpuinfo_uarch_meerkat_m3 + 1):
323 xnn_params.f32.gemm = (struct gemm_parameters) {
324 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
325 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
326 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
327 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
328 .mr = 6,
329 .nr = 8,
330 };
331 break;
332 case cpuinfo_uarch_cortex_a53:
333 case cpuinfo_uarch_cortex_a55:
334 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700335 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
336 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
337 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
338 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
339 .mr = 6,
340 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700341 };
342 break;
343 case cpuinfo_uarch_cortex_a73:
344 xnn_params.f32.gemm = (struct gemm_parameters) {
345 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
346 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
347 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
348 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
349 .mr = 6,
350 .nr = 8,
351 };
352 break;
353 default:
354 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700355 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
356 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700357 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
358 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700359 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700360 .nr = 8,
361 };
362 break;
363 }
364 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700365 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700366 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
367 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700368 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
369 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700370 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 .nr = 8,
372 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700373 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700374
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700375 xnn_params.f32.gemm2 = (struct gemm_parameters) {
376 .gemm = NULL,
377 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
378 .mr = 4,
379 .nr = 2,
380 };
381 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
382 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
383 .cr = 4,
384 .mr = 4,
385 };
386 switch (cpuinfo_get_core(0)->uarch) {
387 case cpuinfo_uarch_kryo:
388 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
389 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
390 .cr = 4,
391 .mr = 9,
392 };
393 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700394#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700395 case cpuinfo_uarch_cortex_a53:
396 case cpuinfo_uarch_cortex_a55:
397 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
398 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
399 .cr = 4,
400 .mr = 9,
401 };
402 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700403#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700404 default:
405 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
406 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
407 .cr = 8,
408 .mr = 9,
409 };
410 break;
411 }
412 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
413 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
414 .cr = 4,
415 .mr = 25,
416 };
417 xnn_params.f32.avgpool = (struct avgpool_parameters) {
418 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
419 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
420 .mr = 9,
421 .qr = 8,
422 };
423 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
424 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
425 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
426 .mr = 9,
427 .qr = 8,
428 };
429 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
430 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
431 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
432 .mr = 7,
433 };
434 xnn_params.f32.maxpool = (struct maxpool_parameters) {
435 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
436 .mr = 9,
437 .qr = 8,
438 };
439 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
440 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
441 .mr = 4,
442 };
443 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
444 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
445 .mr = 9,
446 };
447 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
448 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
449 .mr = 9,
450 .qr = 8,
451 };
Marat Dukhan69722492019-11-11 19:55:50 -0800452 xnn_params.f32.bilinear = (struct bilinear_parameters) {
453 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
454 .pixel_tile = 1,
455 .channel_tile = 8,
456 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700457 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
458 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800459 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neonfma_p5_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700460 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800461 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
462 .row_tile = 2,
463 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700464 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800465 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700466 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800467 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
468 .channel_tile = 4,
469 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700470 };
471 #ifndef XNN_NO_SPNCHW_OPERATORS
472 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700473 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700474 .mr = 16,
475 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700476 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700477 xnn_params.f32.spmm2 = (struct spmm_parameters) {
478 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
479 .mr = 16,
480 .nr = 2,
481 };
482 xnn_params.f32.spmm4 = (struct spmm_parameters) {
483 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
484 .mr = 16,
485 .nr = 4,
486 };
487 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
488 .ukernel_with_symm_padding =
489 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
490 .output_channel_tile = 4,
491 .output_height_tile = 2,
492 .output_width_tile = 2,
493 };
494 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
495 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
496 .input_width_tile = 4,
497 .output_width_tile = 4,
498 .output_height_tile = 3,
499 };
500 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
501 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
502 .input_width_tile = 4,
503 .output_width_tile = 4,
504 .output_height_tile = 1,
505 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800506 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
507 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
508 .input_width_tile = 4,
509 .output_width_tile = 4,
510 .output_height_tile = 1,
511 };
512 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
513 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
514 .input_width_tile = 4,
515 .output_width_tile = 4,
516 .output_height_tile = 1,
517 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700518 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
519 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
520 .channel_tile = 4,
521 };
522 #endif // XNN_NO_SPNCHW_OPERATORS
523 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700524
525 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700526 #ifndef XNN_NO_X32_OPERATORS
527 xnn_params.x32.pad = (struct pad_parameters) {
528 .ukernel = xnn_x32_pad_x2__neon,
529 .mr = 2,
530 };
531 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
532 xnn_params.x32.zip = (struct zip_parameters) {
533 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
534 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
535 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
536 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
537 };
538 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700539
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700540#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700541 if (!cpuinfo_has_x86_sse2()) {
542 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
543 return;
544 }
545
546 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700547 #ifndef XNN_NO_Q8_OPERATORS
548 xnn_params.q8.gemm = (struct gemm_parameters) {
549 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
550 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
551 .mr = 4,
552 .nr = 4,
553 .log2_kr = 1,
554 };
555 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
556 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
557 .cr = 8,
558 .mr = 9,
559 };
560 xnn_params.q8.avgpool = (struct avgpool_parameters) {
561 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
562 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
563 .mr = 9,
564 .qr = 8,
565 };
566 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
567 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
568 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
569 .mr = 7,
570 };
571 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
572 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700573
574 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700575 #ifndef XNN_NO_U8_OPERATORS
576 xnn_params.u8.maxpool = (struct maxpool_parameters) {
577 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
578 .mr = 9,
579 .qr = 8,
580 };
581 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
582 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
583 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
584 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700585
586 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700587 #ifndef XNN_NO_X8_OPERATORS
588 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
589 xnn_params.x8.zip = (struct zip_parameters) {
590 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
591 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
592 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
593 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
594 };
595 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700596
597 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700598 #ifndef XNN_NO_F32_OPERATORS
599 xnn_params.f32.gemm = (struct gemm_parameters) {
600 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
601 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
602 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
603 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
604 .mr = 4,
605 .nr = 8,
606 };
607 xnn_params.f32.gemm2 = (struct gemm_parameters) {
608 .gemm = NULL,
609 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
610 .mr = 4,
611 .nr = 2,
612 .log2_kr = 2,
613 };
614 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800615 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
616 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700617 .mr = 4,
618 };
619 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800620 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
621 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700622 .mr = 9,
623 };
624 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800625 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
626 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700627 .mr = 25,
628 };
629 xnn_params.f32.avgpool = (struct avgpool_parameters) {
630 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
631 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
632 .mr = 9,
633 .qr = 8,
634 };
635 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
636 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
637 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
638 .mr = 9,
639 .qr = 8,
640 };
641 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
642 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
643 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
644 .mr = 7,
645 };
646 xnn_params.f32.maxpool = (struct maxpool_parameters) {
647 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
648 .mr = 9,
649 .qr = 8,
650 };
651 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
652 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
653 .mr = 4,
654 };
655 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
656 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
657 .mr = 9,
658 };
659 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
660 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
661 .mr = 9,
662 .qr = 8,
663 };
Marat Dukhan69722492019-11-11 19:55:50 -0800664 xnn_params.f32.bilinear = (struct bilinear_parameters) {
665 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
666 .pixel_tile = 1,
667 .channel_tile = 8,
668 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700669 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
670 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
671 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800672 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
673 .row_tile = 2,
674 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700675 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800676 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700677 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800678 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
679 .channel_tile = 4,
680 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700681 };
682 #ifndef XNN_NO_SPNCHW_OPERATORS
683 xnn_params.f32.spmm = (struct spmm_parameters) {
684 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
685 .mr = 4,
686 .nr = 1,
687 };
688 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
689 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
690 .input_width_tile = 4,
691 .output_width_tile = 4,
692 .output_height_tile = 1,
693 };
694 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
695 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
696 .input_width_tile = 4,
697 .output_width_tile = 4,
698 .output_height_tile = 1,
699 };
700 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
701 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
702 .channel_tile = 4,
703 };
704 #endif // XNN_NO_SPNCHW_OPERATORS
705 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700706
707 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700708 #ifndef XNN_NO_X32_OPERATORS
709 xnn_params.x32.pad = (struct pad_parameters) {
710 .ukernel = xnn_x32_pad_x2__sse2,
711 .mr = 2,
712 };
713 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
714 xnn_params.x32.zip = (struct zip_parameters) {
715 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
716 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
717 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
718 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
719 };
720 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700721
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700722#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700723 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
724 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
725 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
726 // of two infinities (must produce NaN per IEEE 754 standard).
727 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
728 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
729
XNNPACK Teamb455b122019-09-27 18:10:33 -0700730 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700731 #ifndef XNN_NO_Q8_OPERATORS
732 xnn_params.q8.gemm = (struct gemm_parameters) {
733 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
734 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
735 .mr = 2,
736 .nr = 2,
737 };
738 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
739 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
740 .cr = 1,
741 .mr = 9,
742 };
743 xnn_params.q8.avgpool = (struct avgpool_parameters) {
744 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
745 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
746 .mr = 9,
747 .qr = 8,
748 };
749 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
750 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
751 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
752 .mr = 7,
753 };
754 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
755 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700756
757 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700758 #ifndef XNN_NO_U8_OPERATORS
759 xnn_params.u8.maxpool = (struct maxpool_parameters) {
760 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
761 .mr = 9,
762 .qr = 8,
763 };
764 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
765 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
766 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
767 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700768
769 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700770 #ifndef XNN_NO_X8_OPERATORS
771 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
772 xnn_params.x8.zip = (struct zip_parameters) {
773 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
774 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
775 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
776 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
777 };
778 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700779
780 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700781 #ifndef XNN_NO_F32_OPERATORS
782 if (is_wasm_x86) {
783 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700784 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
785 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
786 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
787 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700788 .mr = 4,
789 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700790 };
791 } else {
792 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700793 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
794 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
795 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
796 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700797 .mr = 6,
798 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700799 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700800 };
801 }
802 xnn_params.f32.gemm2 = (struct gemm_parameters) {
803 .gemm = NULL,
804 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700805 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700806 .nr = 2,
807 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700808 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700809 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800810 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700811 .cr = 4,
812 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700813 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700814 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800815 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700816 .cr = 4,
817 .mr = 9,
818 };
819 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800820 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700821 .cr = 4,
822 .mr = 25,
823 };
824 xnn_params.f32.avgpool = (struct avgpool_parameters) {
825 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
826 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
827 .mr = 9,
828 .qr = 8,
829 };
830 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
831 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
832 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
833 .mr = 9,
834 .qr = 8,
835 };
836 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
837 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
838 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
839 .mr = 7,
840 };
841 xnn_params.f32.maxpool = (struct maxpool_parameters) {
842 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
843 .mr = 9,
844 .qr = 8,
845 };
846 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
847 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
848 .mr = 4,
849 };
850 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
851 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
852 .mr = 9,
853 };
854 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
855 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
856 .mr = 9,
857 .qr = 8,
858 };
Marat Dukhan69722492019-11-11 19:55:50 -0800859 xnn_params.f32.bilinear = (struct bilinear_parameters) {
860 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
861 .pixel_tile = 1,
862 .channel_tile = 8,
863 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700864 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
865 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
866 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800867 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
868 .row_tile = 2,
869 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700870 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800871 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700872 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800873 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
874 .channel_tile = 4,
875 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700876 };
877 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700878
879 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700880 #ifndef XNN_NO_X32_OPERATORS
881 xnn_params.x32.pad = (struct pad_parameters) {
882 .ukernel = xnn_x32_pad_x2__psimd,
883 .mr = 2,
884 };
885 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
886 xnn_params.x32.zip = (struct zip_parameters) {
887 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
888 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
889 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
890 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
891 };
892 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700893
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700894#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700895 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
896 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
897 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
898 // of two infinities (must produce NaN per IEEE 754 standard).
899 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
900 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
901
902 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700903 #ifndef XNN_NO_Q8_OPERATORS
904 xnn_params.q8.gemm = (struct gemm_parameters) {
905 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
906 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
907 .mr = 2,
908 .nr = 2,
909 };
910 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
911 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
912 .cr = 1,
913 .mr = 9,
914 };
915 xnn_params.q8.avgpool = (struct avgpool_parameters) {
916 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
917 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
918 .mr = 9,
919 .qr = 8,
920 };
921 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
922 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
923 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
924 .mr = 7,
925 };
926 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
927 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700928
929 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700930 #ifndef XNN_NO_U8_OPERATORS
931 xnn_params.u8.maxpool = (struct maxpool_parameters) {
932 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
933 .mr = 9,
934 .qr = 8,
935 };
936 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
937 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
938 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
939 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700940
941 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700942 #ifndef XNN_NO_X8_OPERATORS
943 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
944 xnn_params.x8.zip = (struct zip_parameters) {
945 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
946 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
947 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
948 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
949 };
950 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700951
952 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700953 #ifndef XNN_NO_F32_OPERATORS
954 if (is_wasm_x86) {
955 xnn_params.f32.gemm = (struct gemm_parameters) {
956 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
957 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
958 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
959 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
960 .mr = 2,
961 .nr = 4,
962 };
963 } else {
964 xnn_params.f32.gemm = (struct gemm_parameters) {
965 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
966 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
967 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
968 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
969 .mr = 4,
970 .nr = 4,
971 };
972 }
973 xnn_params.f32.gemm2 = (struct gemm_parameters) {
974 .gemm = NULL,
975 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700976 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700977 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700978 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700979 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800980 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700981 .cr = 1,
982 .mr = 4,
983 };
984 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800985 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700986 .cr = 1,
987 .mr = 9,
988 };
989 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800990 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700991 .cr = 1,
992 .mr = 25,
993 };
994 xnn_params.f32.avgpool = (struct avgpool_parameters) {
995 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
996 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
997 .mr = 9,
998 .qr = 8,
999 };
1000 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
1001 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
1002 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
1003 .mr = 9,
1004 .qr = 8,
1005 };
1006 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
1007 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
1008 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
1009 .mr = 7,
1010 };
1011 xnn_params.f32.maxpool = (struct maxpool_parameters) {
1012 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
1013 .mr = 9,
1014 .qr = 8,
1015 };
1016 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1017 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1018 .mr = 4,
1019 };
1020 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1021 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1022 .mr = 9,
1023 };
1024 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1025 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1026 .mr = 9,
1027 .qr = 8,
1028 };
Marat Dukhan69722492019-11-11 19:55:50 -08001029 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1030 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1031 .pixel_tile = 1,
1032 .channel_tile = 2,
1033 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001034 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1035 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1036 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001037 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1038 .row_tile = 4,
1039 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001040 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001041 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001042 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001043 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1044 .channel_tile = 1,
1045 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001046 };
1047 #ifndef XNN_NO_SPNCHW_OPERATORS
1048 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001049 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1050 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001051 .nr = 1,
1052 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001053 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1054 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1055 .mr = 8,
1056 .nr = 2,
1057 };
1058 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1059 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1060 .mr = 8,
1061 .nr = 4,
1062 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001063 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1064 .ukernel_with_symm_padding =
1065 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1066 .output_channel_tile = 4,
1067 .output_height_tile = 1,
1068 .output_width_tile = 1,
1069 };
1070 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1071 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1072 .input_width_tile = 1,
1073 .output_width_tile = 1,
1074 .output_height_tile = 1,
1075 };
1076 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1077 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1078 .input_width_tile = 1,
1079 .output_width_tile = 1,
1080 .output_height_tile = 1,
1081 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001082 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1083 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1084 .input_width_tile = 1,
1085 .output_width_tile = 1,
1086 .output_height_tile = 1,
1087 };
1088 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1089 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1090 .input_width_tile = 1,
1091 .output_width_tile = 1,
1092 .output_height_tile = 1,
1093 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001094 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1095 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1096 .channel_tile = 1,
1097 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001098 #endif // XNN_NO_SPNCHW_OPERATORS
1099 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001100
1101 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001102 #ifndef XNN_NO_X32_OPERATORS
1103 xnn_params.x32.pad = (struct pad_parameters) {
1104 .ukernel = xnn_x32_pad_x2__scalar,
1105 .mr = 2,
1106 };
1107 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1108 xnn_params.x32.zip = (struct zip_parameters) {
1109 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1110 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1111 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1112 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1113 };
1114 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001115
1116#else
1117 #error "Unsupported architecture"
1118#endif
1119 xnn_params.initialized = true;
1120}
1121
1122enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001123 #ifndef __EMSCRIPTEN__
1124 if (!cpuinfo_initialize()) {
1125 return xnn_status_out_of_memory;
1126 }
1127 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001128 pthread_once(&init_guard, &init);
1129 if (xnn_params.initialized) {
1130 return xnn_status_success;
1131 } else {
1132 return xnn_status_unsupported_hardware;
1133 }
1134}
1135
1136enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001137 #ifndef __EMSCRIPTEN__
1138 cpuinfo_deinitialize();
1139 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001140 return xnn_status_success;
1141}