blob: bec4f391e7c4657f166198e967fcc947a2e22467 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080022#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
30#include <xnnpack/igemm.h>
31#include <xnnpack/log.h>
32#include <xnnpack/lut.h>
33#include <xnnpack/maxpool.h>
34#include <xnnpack/pad.h>
35#include <xnnpack/params.h>
36#include <xnnpack/pavgpool.h>
37#include <xnnpack/prelu.h>
38#include <xnnpack/rmax.h>
39#include <xnnpack/spmm.h>
40#include <xnnpack/unpool.h>
41#include <xnnpack/vadd.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080042#include <xnnpack/vbinop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070043#include <xnnpack/vmulcaddc.h>
Marat Dukhan346a9e52019-11-15 09:06:30 -080044#include <xnnpack/vunop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/zip.h>
46
47#ifndef XNN_ENABLE_ASSEMBLY
48 #define XNN_ENABLE_ASSEMBLY 1
49#endif
50
51static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
52
53struct xnn_parameters xnn_params = {
54 .initialized = false
55};
56
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
59#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070060#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070061 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
62#endif
63
64static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070065#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070066 if (!cpuinfo_has_arm_neon()) {
67 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
68 return;
69 }
70
71 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070072 #ifndef XNN_NO_Q8_OPERATORS
73 xnn_params.q8.gemm = (struct gemm_parameters) {
74 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
75 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
76 .mr = 4,
77 .nr = 8,
78 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070079
Marat Dukhan8fe54e42019-10-10 14:12:59 -070080 #if XNN_ENABLE_ASSEMBLY
81 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
82 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
83 .cr = 8,
84 .mr = 9,
85 };
86 #else
87 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
88 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
89 .cr = 8,
90 .mr = 9,
91 };
92 #endif
93 xnn_params.q8.avgpool = (struct avgpool_parameters) {
94 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
95 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
96 .mr = 9,
97 .qr = 8,
98 };
99 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
100 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
101 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
102 .mr = 7,
103 };
104 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
105 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106
107 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700108 #ifndef XNN_NO_U8_OPERATORS
109 xnn_params.u8.maxpool = (struct maxpool_parameters) {
110 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
111 .mr = 9,
112 .qr = 8,
113 };
114 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
115 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
116 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
117 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118
119 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700120 #ifndef XNN_NO_X8_OPERATORS
121 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
122 xnn_params.x8.zip = (struct zip_parameters) {
123 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
124 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
125 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
126 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
127 };
128 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129
130 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700131 #ifndef XNN_NO_F32_OPERATORS
132 xnn_params.f32.gemm = (struct gemm_parameters) {
133 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
134 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
135 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
136 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
137 .mr = 4,
138 .nr = 8,
139 };
140 xnn_params.f32.gemm2 = (struct gemm_parameters) {
141 .gemm = NULL,
142 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
143 .mr = 4,
144 .nr = 2,
145 };
146 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
148 .cr = 4,
149 .mr = 4,
150 };
151 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
152 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
153 .cr = 4,
154 .mr = 9,
155 };
156 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
157 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
158 .cr = 4,
159 .mr = 25,
160 };
161 xnn_params.f32.avgpool = (struct avgpool_parameters) {
162 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
163 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
164 .mr = 9,
165 .qr = 8,
166 };
167 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
168 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
169 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
174 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
175 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
176 .mr = 7,
177 };
178 xnn_params.f32.maxpool = (struct maxpool_parameters) {
179 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
180 .mr = 9,
181 .qr = 8,
182 };
183 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
184 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
185 .mr = 4,
186 };
187 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
188 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
189 .mr = 9,
190 };
191 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
192 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
193 .mr = 9,
194 .qr = 8,
195 };
Marat Dukhan69722492019-11-11 19:55:50 -0800196 xnn_params.f32.bilinear = (struct bilinear_parameters) {
197 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
198 .pixel_tile = 1,
199 .channel_tile = 8,
200 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700201 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
202 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
203 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800204 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
205 .row_tile = 2,
206 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700207 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800208 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800209 xnn_params.f32.vmul = (struct vbinop_parameters) {
210 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
211 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
212 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
213 .element_tile = 8,
214 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700215 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800216 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
217 .channel_tile = 4,
218 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700219 };
220 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700221
222 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700223 #ifndef XNN_NO_X32_OPERATORS
224 xnn_params.x32.pad = (struct pad_parameters) {
225 .ukernel = xnn_x32_pad_x2__neon,
226 .mr = 2,
227 };
228 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
229 xnn_params.x32.zip = (struct zip_parameters) {
230 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
231 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
232 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
233 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
234 };
235 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700236
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700237#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700238
239 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700240 #ifndef XNN_NO_Q8_OPERATORS
241 xnn_params.q8.gemm = (struct gemm_parameters) {
242 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
243 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
244 .mr = 8,
245 .nr = 8,
246 };
247 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
248 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
249 .cr = 8,
250 .mr = 9,
251 };
252 xnn_params.q8.avgpool = (struct avgpool_parameters) {
253 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
254 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
255 .mr = 9,
256 .qr = 8,
257 };
258 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
259 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
260 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
261 .mr = 7,
262 };
263 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
264 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700265
266 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700267 #ifndef XNN_NO_U8_OPERATORS
268 xnn_params.u8.maxpool = (struct maxpool_parameters) {
269 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
270 .mr = 9,
271 .qr = 8,
272 };
273 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
274 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
275 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
276 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700277
278 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700279 #ifndef XNN_NO_X8_OPERATORS
280 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
281 xnn_params.x8.zip = (struct zip_parameters) {
282 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
283 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
284 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
285 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
286 };
287 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700288
289 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700290 #ifndef XNN_NO_F32_OPERATORS
291 #if XNN_ENABLE_ASSEMBLY
292 switch (cpuinfo_get_core(0)->uarch) {
293 case cpuinfo_uarch_kryo:
294 xnn_params.f32.gemm = (struct gemm_parameters) {
295 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
296 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
297 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
298 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
299 .mr = 4,
300 .nr = 8,
301 };
302 break;
303 case cpuinfo_uarch_cortex_a57:
304 xnn_params.f32.gemm = (struct gemm_parameters) {
305 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
306 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
307 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
308 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
309 .mr = 6,
310 .nr = 8,
311 };
312 break;
313 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700314 xnn_params.f32.gemm = (struct gemm_parameters) {
315 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
316 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
317 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
318 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
319 .mr = 4,
320 .nr = 8,
321 };
322 break;
323 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700324 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700325 case cpuinfo_uarch_mongoose_m1:
326 case cpuinfo_uarch_mongoose_m2:
327 case cpuinfo_uarch_meerkat_m3:
328 case (cpuinfo_uarch_meerkat_m3 + 1):
329 xnn_params.f32.gemm = (struct gemm_parameters) {
330 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
331 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
332 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
333 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
334 .mr = 6,
335 .nr = 8,
336 };
337 break;
338 case cpuinfo_uarch_cortex_a53:
339 case cpuinfo_uarch_cortex_a55:
340 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700341 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
342 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
343 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
344 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
345 .mr = 6,
346 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700347 };
348 break;
349 case cpuinfo_uarch_cortex_a73:
350 xnn_params.f32.gemm = (struct gemm_parameters) {
351 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
352 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
353 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
354 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
355 .mr = 6,
356 .nr = 8,
357 };
358 break;
359 default:
360 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700361 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
362 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700363 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
364 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700365 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700366 .nr = 8,
367 };
368 break;
369 }
370 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700372 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
373 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700374 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
375 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700376 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700377 .nr = 8,
378 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700379 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700380
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700381 xnn_params.f32.gemm2 = (struct gemm_parameters) {
382 .gemm = NULL,
383 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
384 .mr = 4,
385 .nr = 2,
386 };
387 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
388 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
389 .cr = 4,
390 .mr = 4,
391 };
392 switch (cpuinfo_get_core(0)->uarch) {
393 case cpuinfo_uarch_kryo:
394 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
395 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
396 .cr = 4,
397 .mr = 9,
398 };
399 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700400#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700401 case cpuinfo_uarch_cortex_a53:
402 case cpuinfo_uarch_cortex_a55:
403 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
404 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
405 .cr = 4,
406 .mr = 9,
407 };
408 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700409#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700410 default:
411 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
412 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
413 .cr = 8,
414 .mr = 9,
415 };
416 break;
417 }
418 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
419 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
420 .cr = 4,
421 .mr = 25,
422 };
423 xnn_params.f32.avgpool = (struct avgpool_parameters) {
424 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
425 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
426 .mr = 9,
427 .qr = 8,
428 };
429 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
430 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
431 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
432 .mr = 9,
433 .qr = 8,
434 };
435 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
436 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
437 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
438 .mr = 7,
439 };
440 xnn_params.f32.maxpool = (struct maxpool_parameters) {
441 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
442 .mr = 9,
443 .qr = 8,
444 };
445 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
446 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
447 .mr = 4,
448 };
449 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
450 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
451 .mr = 9,
452 };
453 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
454 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
455 .mr = 9,
456 .qr = 8,
457 };
Marat Dukhan69722492019-11-11 19:55:50 -0800458 xnn_params.f32.bilinear = (struct bilinear_parameters) {
459 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
460 .pixel_tile = 1,
461 .channel_tile = 8,
462 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700463 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
464 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan346a9e52019-11-15 09:06:30 -0800465 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neonfma_p5_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700466 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800467 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
468 .row_tile = 2,
469 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700470 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800471 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800472 xnn_params.f32.vmul = (struct vbinop_parameters) {
473 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
474 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
475 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
476 .element_tile = 8,
477 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700478 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800479 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
480 .channel_tile = 4,
481 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700482 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800483 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700484 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700485 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700486 .mr = 16,
487 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700488 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700489 xnn_params.f32.spmm2 = (struct spmm_parameters) {
490 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
491 .mr = 16,
492 .nr = 2,
493 };
494 xnn_params.f32.spmm4 = (struct spmm_parameters) {
495 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
496 .mr = 16,
497 .nr = 4,
498 };
499 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
500 .ukernel_with_symm_padding =
501 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
502 .output_channel_tile = 4,
503 .output_height_tile = 2,
504 .output_width_tile = 2,
505 };
506 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
507 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
508 .input_width_tile = 4,
509 .output_width_tile = 4,
510 .output_height_tile = 3,
511 };
512 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
513 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
514 .input_width_tile = 4,
515 .output_width_tile = 4,
516 .output_height_tile = 1,
517 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800518 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
519 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
520 .input_width_tile = 4,
521 .output_width_tile = 4,
Erich Elsende9b0fb2019-11-15 18:19:03 -0800522 .output_height_tile = 2,
Marat Dukhana99918a2019-11-15 14:40:12 -0800523 };
524 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
525 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
526 .input_width_tile = 4,
527 .output_width_tile = 4,
528 .output_height_tile = 1,
529 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700530 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
531 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
532 .channel_tile = 4,
533 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800534 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700535 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700536
537 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700538 #ifndef XNN_NO_X32_OPERATORS
539 xnn_params.x32.pad = (struct pad_parameters) {
540 .ukernel = xnn_x32_pad_x2__neon,
541 .mr = 2,
542 };
543 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
544 xnn_params.x32.zip = (struct zip_parameters) {
545 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
546 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
547 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
548 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
549 };
550 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700551
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700552#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700553 if (!cpuinfo_has_x86_sse2()) {
554 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
555 return;
556 }
557
558 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700559 #ifndef XNN_NO_Q8_OPERATORS
560 xnn_params.q8.gemm = (struct gemm_parameters) {
561 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
562 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
563 .mr = 4,
564 .nr = 4,
565 .log2_kr = 1,
566 };
567 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
568 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
569 .cr = 8,
570 .mr = 9,
571 };
572 xnn_params.q8.avgpool = (struct avgpool_parameters) {
573 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
574 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
575 .mr = 9,
576 .qr = 8,
577 };
578 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
579 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
580 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
581 .mr = 7,
582 };
583 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
584 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700585
586 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700587 #ifndef XNN_NO_U8_OPERATORS
588 xnn_params.u8.maxpool = (struct maxpool_parameters) {
589 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
590 .mr = 9,
591 .qr = 8,
592 };
593 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
594 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
595 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
596 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700597
598 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700599 #ifndef XNN_NO_X8_OPERATORS
600 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
601 xnn_params.x8.zip = (struct zip_parameters) {
602 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
603 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
604 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
605 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
606 };
607 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700608
609 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700610 #ifndef XNN_NO_F32_OPERATORS
611 xnn_params.f32.gemm = (struct gemm_parameters) {
612 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
613 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
614 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
615 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
616 .mr = 4,
617 .nr = 8,
618 };
619 xnn_params.f32.gemm2 = (struct gemm_parameters) {
620 .gemm = NULL,
621 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
622 .mr = 4,
623 .nr = 2,
624 .log2_kr = 2,
625 };
626 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800627 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
628 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700629 .mr = 4,
630 };
631 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800632 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
633 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700634 .mr = 9,
635 };
636 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800637 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
638 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700639 .mr = 25,
640 };
641 xnn_params.f32.avgpool = (struct avgpool_parameters) {
642 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
643 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
644 .mr = 9,
645 .qr = 8,
646 };
647 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
648 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
649 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
650 .mr = 9,
651 .qr = 8,
652 };
653 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
654 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
655 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
656 .mr = 7,
657 };
658 xnn_params.f32.maxpool = (struct maxpool_parameters) {
659 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
660 .mr = 9,
661 .qr = 8,
662 };
663 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
664 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
665 .mr = 4,
666 };
667 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
668 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
669 .mr = 9,
670 };
671 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
672 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
673 .mr = 9,
674 .qr = 8,
675 };
Marat Dukhan69722492019-11-11 19:55:50 -0800676 xnn_params.f32.bilinear = (struct bilinear_parameters) {
677 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
678 .pixel_tile = 1,
679 .channel_tile = 8,
680 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700681 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
682 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
683 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800684 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
685 .row_tile = 2,
686 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700687 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800688 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800689 xnn_params.f32.vmul = (struct vbinop_parameters) {
690 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__sse_x8,
691 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
692 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
693 .element_tile = 8,
694 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700695 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800696 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
697 .channel_tile = 4,
698 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700699 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800700 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700701 xnn_params.f32.spmm = (struct spmm_parameters) {
702 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
703 .mr = 4,
704 .nr = 1,
705 };
706 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
707 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
708 .input_width_tile = 4,
709 .output_width_tile = 4,
710 .output_height_tile = 1,
711 };
712 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
713 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
714 .input_width_tile = 4,
715 .output_width_tile = 4,
716 .output_height_tile = 1,
717 };
718 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
719 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
720 .channel_tile = 4,
721 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800722 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700723 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700724
725 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700726 #ifndef XNN_NO_X32_OPERATORS
727 xnn_params.x32.pad = (struct pad_parameters) {
728 .ukernel = xnn_x32_pad_x2__sse2,
729 .mr = 2,
730 };
731 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
732 xnn_params.x32.zip = (struct zip_parameters) {
733 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
734 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
735 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
736 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
737 };
738 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700739
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700740#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700741 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
742 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
743 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
744 // of two infinities (must produce NaN per IEEE 754 standard).
745 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
746 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
747
XNNPACK Teamb455b122019-09-27 18:10:33 -0700748 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700749 #ifndef XNN_NO_Q8_OPERATORS
750 xnn_params.q8.gemm = (struct gemm_parameters) {
751 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
752 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
753 .mr = 2,
754 .nr = 2,
755 };
756 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
757 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
758 .cr = 1,
759 .mr = 9,
760 };
761 xnn_params.q8.avgpool = (struct avgpool_parameters) {
762 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
763 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
764 .mr = 9,
765 .qr = 8,
766 };
767 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
768 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
769 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
770 .mr = 7,
771 };
772 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
773 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700774
775 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700776 #ifndef XNN_NO_U8_OPERATORS
777 xnn_params.u8.maxpool = (struct maxpool_parameters) {
778 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
779 .mr = 9,
780 .qr = 8,
781 };
782 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
783 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
784 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
785 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700786
787 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700788 #ifndef XNN_NO_X8_OPERATORS
789 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
790 xnn_params.x8.zip = (struct zip_parameters) {
791 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
792 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
793 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
794 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
795 };
796 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700797
798 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700799 #ifndef XNN_NO_F32_OPERATORS
800 if (is_wasm_x86) {
801 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700802 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
803 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
804 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
805 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700806 .mr = 4,
807 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700808 };
809 } else {
810 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700811 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
812 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
813 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
814 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700815 .mr = 6,
816 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700817 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700818 };
819 }
820 xnn_params.f32.gemm2 = (struct gemm_parameters) {
821 .gemm = NULL,
822 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700823 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700824 .nr = 2,
825 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700826 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700827 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800828 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700829 .cr = 4,
830 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700831 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700832 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800833 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700834 .cr = 4,
835 .mr = 9,
836 };
837 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800838 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700839 .cr = 4,
840 .mr = 25,
841 };
842 xnn_params.f32.avgpool = (struct avgpool_parameters) {
843 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
844 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
845 .mr = 9,
846 .qr = 8,
847 };
848 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
849 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
850 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
851 .mr = 9,
852 .qr = 8,
853 };
854 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
855 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
856 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
857 .mr = 7,
858 };
859 xnn_params.f32.maxpool = (struct maxpool_parameters) {
860 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
861 .mr = 9,
862 .qr = 8,
863 };
864 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
865 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
866 .mr = 4,
867 };
868 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
869 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
870 .mr = 9,
871 };
872 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
873 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
874 .mr = 9,
875 .qr = 8,
876 };
Marat Dukhan69722492019-11-11 19:55:50 -0800877 xnn_params.f32.bilinear = (struct bilinear_parameters) {
878 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
879 .pixel_tile = 1,
880 .channel_tile = 8,
881 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700882 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
883 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
884 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800885 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
886 .row_tile = 2,
887 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700888 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800889 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800890 xnn_params.f32.vmul = (struct vbinop_parameters) {
891 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__psimd_x8,
892 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
893 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
894 .element_tile = 8,
895 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700896 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800897 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
898 .channel_tile = 4,
899 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700900 };
901 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700902
903 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700904 #ifndef XNN_NO_X32_OPERATORS
905 xnn_params.x32.pad = (struct pad_parameters) {
906 .ukernel = xnn_x32_pad_x2__psimd,
907 .mr = 2,
908 };
909 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
910 xnn_params.x32.zip = (struct zip_parameters) {
911 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
912 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
913 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
914 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
915 };
916 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700917
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700918#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700919 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
920 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
921 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
922 // of two infinities (must produce NaN per IEEE 754 standard).
923 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
924 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
925
926 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700927 #ifndef XNN_NO_Q8_OPERATORS
928 xnn_params.q8.gemm = (struct gemm_parameters) {
929 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
930 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
931 .mr = 2,
932 .nr = 2,
933 };
934 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
935 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
936 .cr = 1,
937 .mr = 9,
938 };
939 xnn_params.q8.avgpool = (struct avgpool_parameters) {
940 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
941 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
942 .mr = 9,
943 .qr = 8,
944 };
945 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
946 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
947 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
948 .mr = 7,
949 };
950 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
951 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700952
953 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700954 #ifndef XNN_NO_U8_OPERATORS
955 xnn_params.u8.maxpool = (struct maxpool_parameters) {
956 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
957 .mr = 9,
958 .qr = 8,
959 };
960 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
961 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
962 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
963 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964
965 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700966 #ifndef XNN_NO_X8_OPERATORS
967 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
968 xnn_params.x8.zip = (struct zip_parameters) {
969 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
970 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
971 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
972 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
973 };
974 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700975
976 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700977 #ifndef XNN_NO_F32_OPERATORS
978 if (is_wasm_x86) {
979 xnn_params.f32.gemm = (struct gemm_parameters) {
980 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
981 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
982 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
983 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
984 .mr = 2,
985 .nr = 4,
986 };
987 } else {
988 xnn_params.f32.gemm = (struct gemm_parameters) {
989 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
990 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
991 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
992 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
993 .mr = 4,
994 .nr = 4,
995 };
996 }
997 xnn_params.f32.gemm2 = (struct gemm_parameters) {
998 .gemm = NULL,
999 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001000 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001001 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001002 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001003 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001004 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001005 .cr = 1,
1006 .mr = 4,
1007 };
1008 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001009 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001010 .cr = 1,
1011 .mr = 9,
1012 };
1013 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001014 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001015 .cr = 1,
1016 .mr = 25,
1017 };
1018 xnn_params.f32.avgpool = (struct avgpool_parameters) {
1019 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
1020 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
1021 .mr = 9,
1022 .qr = 8,
1023 };
1024 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
1025 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
1026 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
1027 .mr = 9,
1028 .qr = 8,
1029 };
1030 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
1031 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
1032 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
1033 .mr = 7,
1034 };
1035 xnn_params.f32.maxpool = (struct maxpool_parameters) {
1036 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
1037 .mr = 9,
1038 .qr = 8,
1039 };
1040 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1041 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1042 .mr = 4,
1043 };
1044 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1045 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1046 .mr = 9,
1047 };
1048 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1049 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1050 .mr = 9,
1051 .qr = 8,
1052 };
Marat Dukhan69722492019-11-11 19:55:50 -08001053 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1054 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1055 .pixel_tile = 1,
1056 .channel_tile = 2,
1057 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001058 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1059 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1060 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001061 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1062 .row_tile = 4,
1063 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001064 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001065 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4;
Marat Dukhanca2733c2019-11-15 23:21:17 -08001066 xnn_params.f32.vmul = (struct vbinop_parameters) {
1067 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__scalar_x4,
1068 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1069 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1070 .element_tile = 8,
1071 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001072 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001073 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1074 .channel_tile = 1,
1075 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001076 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001077 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001078 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001079 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1080 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001081 .nr = 1,
1082 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001083 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1084 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1085 .mr = 8,
1086 .nr = 2,
1087 };
1088 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1089 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1090 .mr = 8,
1091 .nr = 4,
1092 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001093 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1094 .ukernel_with_symm_padding =
1095 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1096 .output_channel_tile = 4,
1097 .output_height_tile = 1,
1098 .output_width_tile = 1,
1099 };
1100 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1101 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1102 .input_width_tile = 1,
1103 .output_width_tile = 1,
1104 .output_height_tile = 1,
1105 };
1106 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1107 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1108 .input_width_tile = 1,
1109 .output_width_tile = 1,
1110 .output_height_tile = 1,
1111 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001112 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1113 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1114 .input_width_tile = 1,
1115 .output_width_tile = 1,
1116 .output_height_tile = 1,
1117 };
1118 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1119 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1120 .input_width_tile = 1,
1121 .output_width_tile = 1,
1122 .output_height_tile = 1,
1123 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001124 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1125 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1126 .channel_tile = 1,
1127 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001128 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001129 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001130
1131 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001132 #ifndef XNN_NO_X32_OPERATORS
1133 xnn_params.x32.pad = (struct pad_parameters) {
1134 .ukernel = xnn_x32_pad_x2__scalar,
1135 .mr = 2,
1136 };
1137 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1138 xnn_params.x32.zip = (struct zip_parameters) {
1139 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1140 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1141 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1142 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1143 };
1144 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001145
1146#else
1147 #error "Unsupported architecture"
1148#endif
1149 xnn_params.initialized = true;
1150}
1151
1152enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001153 #ifndef __EMSCRIPTEN__
1154 if (!cpuinfo_initialize()) {
1155 return xnn_status_out_of_memory;
1156 }
1157 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001158 pthread_once(&init_guard, &init);
1159 if (xnn_params.initialized) {
1160 return xnn_status_success;
1161 } else {
1162 return xnn_status_unsupported_hardware;
1163 }
1164}
1165
1166enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001167 #ifndef __EMSCRIPTEN__
1168 cpuinfo_deinitialize();
1169 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001170 return xnn_status_success;
1171}