blob: f43fcf164f4007c748bba6ded4bc9efeb50a770e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080022#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
30#include <xnnpack/igemm.h>
31#include <xnnpack/log.h>
32#include <xnnpack/lut.h>
33#include <xnnpack/maxpool.h>
34#include <xnnpack/pad.h>
35#include <xnnpack/params.h>
36#include <xnnpack/pavgpool.h>
37#include <xnnpack/prelu.h>
38#include <xnnpack/rmax.h>
39#include <xnnpack/spmm.h>
40#include <xnnpack/unpool.h>
41#include <xnnpack/vadd.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080042#include <xnnpack/vbinop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070043#include <xnnpack/vmulcaddc.h>
Marat Dukhan346a9e52019-11-15 09:06:30 -080044#include <xnnpack/vunop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/zip.h>
46
47#ifndef XNN_ENABLE_ASSEMBLY
48 #define XNN_ENABLE_ASSEMBLY 1
49#endif
50
51static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
52
53struct xnn_parameters xnn_params = {
54 .initialized = false
55};
56
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
59#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070060#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070061 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
62#endif
63
64static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070065#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070066 if (!cpuinfo_has_arm_neon()) {
67 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
68 return;
69 }
70
71 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070072 #ifndef XNN_NO_Q8_OPERATORS
73 xnn_params.q8.gemm = (struct gemm_parameters) {
74 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
75 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
76 .mr = 4,
77 .nr = 8,
78 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070079
Marat Dukhan8fe54e42019-10-10 14:12:59 -070080 #if XNN_ENABLE_ASSEMBLY
81 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
82 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
83 .cr = 8,
84 .mr = 9,
85 };
86 #else
87 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
88 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
89 .cr = 8,
90 .mr = 9,
91 };
92 #endif
93 xnn_params.q8.avgpool = (struct avgpool_parameters) {
94 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
95 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
96 .mr = 9,
97 .qr = 8,
98 };
99 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
100 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
101 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
102 .mr = 7,
103 };
104 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
105 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106
107 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700108 #ifndef XNN_NO_U8_OPERATORS
109 xnn_params.u8.maxpool = (struct maxpool_parameters) {
110 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
111 .mr = 9,
112 .qr = 8,
113 };
114 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
115 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
116 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
117 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118
119 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700120 #ifndef XNN_NO_X8_OPERATORS
121 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
122 xnn_params.x8.zip = (struct zip_parameters) {
123 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
124 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
125 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
126 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
127 };
128 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129
130 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700131 #ifndef XNN_NO_F32_OPERATORS
132 xnn_params.f32.gemm = (struct gemm_parameters) {
133 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
134 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
135 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
136 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
137 .mr = 4,
138 .nr = 8,
139 };
140 xnn_params.f32.gemm2 = (struct gemm_parameters) {
141 .gemm = NULL,
142 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
143 .mr = 4,
144 .nr = 2,
145 };
146 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
148 .cr = 4,
149 .mr = 4,
150 };
151 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
152 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
153 .cr = 4,
154 .mr = 9,
155 };
156 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
157 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
158 .cr = 4,
159 .mr = 25,
160 };
161 xnn_params.f32.avgpool = (struct avgpool_parameters) {
162 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
163 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
164 .mr = 9,
165 .qr = 8,
166 };
167 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
168 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
169 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
174 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
175 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
176 .mr = 7,
177 };
178 xnn_params.f32.maxpool = (struct maxpool_parameters) {
179 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
180 .mr = 9,
181 .qr = 8,
182 };
183 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
184 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
185 .mr = 4,
186 };
187 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
188 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
189 .mr = 9,
190 };
191 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
192 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
193 .mr = 9,
194 .qr = 8,
195 };
Marat Dukhan69722492019-11-11 19:55:50 -0800196 xnn_params.f32.bilinear = (struct bilinear_parameters) {
197 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
198 .pixel_tile = 1,
199 .channel_tile = 8,
200 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700201 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
202 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
203 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800204 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
205 .row_tile = 2,
206 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700207 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800208 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800209 xnn_params.f32.vmul = (struct vbinop_parameters) {
210 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
211 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
212 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
213 .element_tile = 8,
214 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700215 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800216 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
217 .channel_tile = 4,
218 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700219 };
220 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700221
222 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700223 #ifndef XNN_NO_X32_OPERATORS
224 xnn_params.x32.pad = (struct pad_parameters) {
225 .ukernel = xnn_x32_pad_x2__neon,
226 .mr = 2,
227 };
228 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
229 xnn_params.x32.zip = (struct zip_parameters) {
230 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
231 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
232 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
233 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
234 };
235 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700236
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700237#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700238
239 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700240 #ifndef XNN_NO_Q8_OPERATORS
241 xnn_params.q8.gemm = (struct gemm_parameters) {
242 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
243 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
244 .mr = 8,
245 .nr = 8,
246 };
247 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
248 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
249 .cr = 8,
250 .mr = 9,
251 };
252 xnn_params.q8.avgpool = (struct avgpool_parameters) {
253 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
254 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
255 .mr = 9,
256 .qr = 8,
257 };
258 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
259 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
260 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
261 .mr = 7,
262 };
263 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
264 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700265
266 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700267 #ifndef XNN_NO_U8_OPERATORS
268 xnn_params.u8.maxpool = (struct maxpool_parameters) {
269 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
270 .mr = 9,
271 .qr = 8,
272 };
273 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
274 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
275 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
276 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700277
278 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700279 #ifndef XNN_NO_X8_OPERATORS
280 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
281 xnn_params.x8.zip = (struct zip_parameters) {
282 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
283 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
284 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
285 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
286 };
287 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700288
289 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700290 #ifndef XNN_NO_F32_OPERATORS
291 #if XNN_ENABLE_ASSEMBLY
292 switch (cpuinfo_get_core(0)->uarch) {
293 case cpuinfo_uarch_kryo:
294 xnn_params.f32.gemm = (struct gemm_parameters) {
295 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
296 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
297 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
298 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
299 .mr = 4,
300 .nr = 8,
301 };
302 break;
303 case cpuinfo_uarch_cortex_a57:
304 xnn_params.f32.gemm = (struct gemm_parameters) {
305 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
306 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
307 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
308 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
309 .mr = 6,
310 .nr = 8,
311 };
312 break;
313 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700314 xnn_params.f32.gemm = (struct gemm_parameters) {
315 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
316 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
317 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
318 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
319 .mr = 4,
320 .nr = 8,
321 };
322 break;
323 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700324 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700325 case cpuinfo_uarch_mongoose_m1:
326 case cpuinfo_uarch_mongoose_m2:
327 case cpuinfo_uarch_meerkat_m3:
328 case (cpuinfo_uarch_meerkat_m3 + 1):
329 xnn_params.f32.gemm = (struct gemm_parameters) {
330 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
331 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
332 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
333 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
334 .mr = 6,
335 .nr = 8,
336 };
337 break;
338 case cpuinfo_uarch_cortex_a53:
339 case cpuinfo_uarch_cortex_a55:
340 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700341 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
342 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
343 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
344 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
345 .mr = 6,
346 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700347 };
348 break;
349 case cpuinfo_uarch_cortex_a73:
350 xnn_params.f32.gemm = (struct gemm_parameters) {
351 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
352 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
353 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
354 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
355 .mr = 6,
356 .nr = 8,
357 };
358 break;
359 default:
360 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700361 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
362 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700363 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
364 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700365 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700366 .nr = 8,
367 };
368 break;
369 }
370 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700372 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
373 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700374 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
375 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700376 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700377 .nr = 8,
378 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700379 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700380
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700381 xnn_params.f32.gemm2 = (struct gemm_parameters) {
382 .gemm = NULL,
383 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
384 .mr = 4,
385 .nr = 2,
386 };
387 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
388 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
389 .cr = 4,
390 .mr = 4,
391 };
392 switch (cpuinfo_get_core(0)->uarch) {
393 case cpuinfo_uarch_kryo:
394 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
395 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
396 .cr = 4,
397 .mr = 9,
398 };
399 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700400#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700401 case cpuinfo_uarch_cortex_a53:
402 case cpuinfo_uarch_cortex_a55:
403 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
404 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
405 .cr = 4,
406 .mr = 9,
407 };
408 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700409#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700410 default:
411 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
412 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
413 .cr = 8,
414 .mr = 9,
415 };
416 break;
417 }
418 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
419 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
420 .cr = 4,
421 .mr = 25,
422 };
423 xnn_params.f32.avgpool = (struct avgpool_parameters) {
424 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
425 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
426 .mr = 9,
427 .qr = 8,
428 };
429 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
430 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
431 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
432 .mr = 9,
433 .qr = 8,
434 };
435 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
436 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
437 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
438 .mr = 7,
439 };
440 xnn_params.f32.maxpool = (struct maxpool_parameters) {
441 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
442 .mr = 9,
443 .qr = 8,
444 };
445 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
446 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
447 .mr = 4,
448 };
449 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
450 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
451 .mr = 9,
452 };
453 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
454 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
455 .mr = 9,
456 .qr = 8,
457 };
Marat Dukhan69722492019-11-11 19:55:50 -0800458 xnn_params.f32.bilinear = (struct bilinear_parameters) {
459 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
460 .pixel_tile = 1,
461 .channel_tile = 8,
462 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700463 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
464 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan14bec502019-11-18 11:35:31 -0800465 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700466 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800467 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
468 .row_tile = 2,
469 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700470 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800471 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800472 xnn_params.f32.vmul = (struct vbinop_parameters) {
473 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
474 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
475 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
476 .element_tile = 8,
477 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700478 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800479 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
480 .channel_tile = 4,
481 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700482 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800483 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700484 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700485 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700486 .mr = 16,
487 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700488 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700489 xnn_params.f32.spmm2 = (struct spmm_parameters) {
490 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
491 .mr = 16,
492 .nr = 2,
493 };
494 xnn_params.f32.spmm4 = (struct spmm_parameters) {
495 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
496 .mr = 16,
497 .nr = 4,
498 };
499 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
500 .ukernel_with_symm_padding =
501 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
502 .output_channel_tile = 4,
503 .output_height_tile = 2,
504 .output_width_tile = 2,
505 };
506 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
507 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
508 .input_width_tile = 4,
509 .output_width_tile = 4,
510 .output_height_tile = 3,
511 };
512 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
513 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
514 .input_width_tile = 4,
515 .output_width_tile = 4,
516 .output_height_tile = 1,
517 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800518 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
519 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
520 .input_width_tile = 4,
521 .output_width_tile = 4,
Erich Elsende9b0fb2019-11-15 18:19:03 -0800522 .output_height_tile = 2,
Marat Dukhana99918a2019-11-15 14:40:12 -0800523 };
524 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
525 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
526 .input_width_tile = 4,
527 .output_width_tile = 4,
528 .output_height_tile = 1,
529 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700530 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
531 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
532 .channel_tile = 4,
533 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800534 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700535 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700536
537 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700538 #ifndef XNN_NO_X32_OPERATORS
539 xnn_params.x32.pad = (struct pad_parameters) {
540 .ukernel = xnn_x32_pad_x2__neon,
541 .mr = 2,
542 };
543 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
544 xnn_params.x32.zip = (struct zip_parameters) {
545 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
546 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
547 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
548 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
549 };
550 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700551
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700552#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700553 if (!cpuinfo_has_x86_sse2()) {
554 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
555 return;
556 }
557
558 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700559 #ifndef XNN_NO_Q8_OPERATORS
560 xnn_params.q8.gemm = (struct gemm_parameters) {
561 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
562 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
563 .mr = 4,
564 .nr = 4,
565 .log2_kr = 1,
566 };
567 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
568 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
569 .cr = 8,
570 .mr = 9,
571 };
572 xnn_params.q8.avgpool = (struct avgpool_parameters) {
573 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
574 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
575 .mr = 9,
576 .qr = 8,
577 };
578 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
579 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
580 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
581 .mr = 7,
582 };
583 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
584 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700585
586 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700587 #ifndef XNN_NO_U8_OPERATORS
588 xnn_params.u8.maxpool = (struct maxpool_parameters) {
589 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
590 .mr = 9,
591 .qr = 8,
592 };
593 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
594 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
595 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
596 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700597
598 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700599 #ifndef XNN_NO_X8_OPERATORS
600 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
601 xnn_params.x8.zip = (struct zip_parameters) {
602 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
603 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
604 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
605 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
606 };
607 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700608
609 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700610 #ifndef XNN_NO_F32_OPERATORS
611 xnn_params.f32.gemm = (struct gemm_parameters) {
612 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
613 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
614 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
615 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
616 .mr = 4,
617 .nr = 8,
618 };
619 xnn_params.f32.gemm2 = (struct gemm_parameters) {
620 .gemm = NULL,
621 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
622 .mr = 4,
623 .nr = 2,
624 .log2_kr = 2,
625 };
626 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800627 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
628 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700629 .mr = 4,
630 };
631 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800632 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
633 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700634 .mr = 9,
635 };
636 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800637 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
638 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700639 .mr = 25,
640 };
641 xnn_params.f32.avgpool = (struct avgpool_parameters) {
642 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
643 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
644 .mr = 9,
645 .qr = 8,
646 };
647 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
648 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
649 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
650 .mr = 9,
651 .qr = 8,
652 };
653 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
654 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
655 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
656 .mr = 7,
657 };
658 xnn_params.f32.maxpool = (struct maxpool_parameters) {
659 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
660 .mr = 9,
661 .qr = 8,
662 };
663 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
664 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
665 .mr = 4,
666 };
667 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
668 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
669 .mr = 9,
670 };
671 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
672 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
673 .mr = 9,
674 .qr = 8,
675 };
Marat Dukhan69722492019-11-11 19:55:50 -0800676 xnn_params.f32.bilinear = (struct bilinear_parameters) {
677 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
678 .pixel_tile = 1,
679 .channel_tile = 8,
680 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700681 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
682 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
Marat Dukhan7bee7512019-11-18 15:15:48 -0800683 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700684 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800685 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
686 .row_tile = 2,
687 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700688 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800689 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800690 xnn_params.f32.vmul = (struct vbinop_parameters) {
691 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__sse_x8,
692 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
693 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
694 .element_tile = 8,
695 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700696 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800697 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
698 .channel_tile = 4,
699 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700700 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800701 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700702 xnn_params.f32.spmm = (struct spmm_parameters) {
703 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
704 .mr = 4,
705 .nr = 1,
706 };
707 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
708 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
709 .input_width_tile = 4,
710 .output_width_tile = 4,
711 .output_height_tile = 1,
712 };
713 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
714 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
715 .input_width_tile = 4,
716 .output_width_tile = 4,
717 .output_height_tile = 1,
718 };
719 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
720 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
721 .channel_tile = 4,
722 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800723 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700724 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700725
726 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700727 #ifndef XNN_NO_X32_OPERATORS
728 xnn_params.x32.pad = (struct pad_parameters) {
729 .ukernel = xnn_x32_pad_x2__sse2,
730 .mr = 2,
731 };
732 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
733 xnn_params.x32.zip = (struct zip_parameters) {
734 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
735 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
736 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
737 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
738 };
739 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700740
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700741#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700742 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
743 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
744 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
745 // of two infinities (must produce NaN per IEEE 754 standard).
746 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
747 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
748
XNNPACK Teamb455b122019-09-27 18:10:33 -0700749 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700750 #ifndef XNN_NO_Q8_OPERATORS
751 xnn_params.q8.gemm = (struct gemm_parameters) {
752 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
753 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
754 .mr = 2,
755 .nr = 2,
756 };
757 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
758 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
759 .cr = 1,
760 .mr = 9,
761 };
762 xnn_params.q8.avgpool = (struct avgpool_parameters) {
763 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
764 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
765 .mr = 9,
766 .qr = 8,
767 };
768 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
769 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
770 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
771 .mr = 7,
772 };
773 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
774 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700775
776 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700777 #ifndef XNN_NO_U8_OPERATORS
778 xnn_params.u8.maxpool = (struct maxpool_parameters) {
779 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
780 .mr = 9,
781 .qr = 8,
782 };
783 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
784 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
785 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
786 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700787
788 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700789 #ifndef XNN_NO_X8_OPERATORS
790 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
791 xnn_params.x8.zip = (struct zip_parameters) {
792 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
793 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
794 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
795 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
796 };
797 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700798
799 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700800 #ifndef XNN_NO_F32_OPERATORS
801 if (is_wasm_x86) {
802 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700803 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
804 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
805 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
806 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700807 .mr = 4,
808 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700809 };
810 } else {
811 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700812 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
813 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
814 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
815 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700816 .mr = 6,
817 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700818 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700819 };
820 }
821 xnn_params.f32.gemm2 = (struct gemm_parameters) {
822 .gemm = NULL,
823 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700824 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700825 .nr = 2,
826 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700827 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700828 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800829 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700830 .cr = 4,
831 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700832 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700833 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800834 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700835 .cr = 4,
836 .mr = 9,
837 };
838 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800839 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700840 .cr = 4,
841 .mr = 25,
842 };
843 xnn_params.f32.avgpool = (struct avgpool_parameters) {
844 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
845 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
846 .mr = 9,
847 .qr = 8,
848 };
849 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
850 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
851 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
852 .mr = 9,
853 .qr = 8,
854 };
855 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
856 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
857 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
858 .mr = 7,
859 };
860 xnn_params.f32.maxpool = (struct maxpool_parameters) {
861 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
862 .mr = 9,
863 .qr = 8,
864 };
865 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
866 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
867 .mr = 4,
868 };
869 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
870 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
871 .mr = 9,
872 };
873 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
874 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
875 .mr = 9,
876 .qr = 8,
877 };
Marat Dukhan69722492019-11-11 19:55:50 -0800878 xnn_params.f32.bilinear = (struct bilinear_parameters) {
879 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
880 .pixel_tile = 1,
881 .channel_tile = 8,
882 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700883 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
884 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
885 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800886 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
887 .row_tile = 2,
888 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700889 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800890 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800891 xnn_params.f32.vmul = (struct vbinop_parameters) {
892 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__psimd_x8,
893 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
894 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
895 .element_tile = 8,
896 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700897 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800898 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
899 .channel_tile = 4,
900 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700901 };
902 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700903
904 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700905 #ifndef XNN_NO_X32_OPERATORS
906 xnn_params.x32.pad = (struct pad_parameters) {
907 .ukernel = xnn_x32_pad_x2__psimd,
908 .mr = 2,
909 };
910 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
911 xnn_params.x32.zip = (struct zip_parameters) {
912 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
913 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
914 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
915 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
916 };
917 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700918
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700919#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700920 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
921 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
922 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
923 // of two infinities (must produce NaN per IEEE 754 standard).
924 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
925 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
926
927 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700928 #ifndef XNN_NO_Q8_OPERATORS
929 xnn_params.q8.gemm = (struct gemm_parameters) {
930 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
931 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
932 .mr = 2,
933 .nr = 2,
934 };
935 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
936 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
937 .cr = 1,
938 .mr = 9,
939 };
940 xnn_params.q8.avgpool = (struct avgpool_parameters) {
941 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
942 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
943 .mr = 9,
944 .qr = 8,
945 };
946 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
947 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
948 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
949 .mr = 7,
950 };
951 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
952 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700953
954 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700955 #ifndef XNN_NO_U8_OPERATORS
956 xnn_params.u8.maxpool = (struct maxpool_parameters) {
957 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
958 .mr = 9,
959 .qr = 8,
960 };
961 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
962 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
963 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
964 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700965
966 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700967 #ifndef XNN_NO_X8_OPERATORS
968 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
969 xnn_params.x8.zip = (struct zip_parameters) {
970 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
971 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
972 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
973 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
974 };
975 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700976
977 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700978 #ifndef XNN_NO_F32_OPERATORS
979 if (is_wasm_x86) {
980 xnn_params.f32.gemm = (struct gemm_parameters) {
981 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
982 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
983 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
984 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
985 .mr = 2,
986 .nr = 4,
987 };
988 } else {
989 xnn_params.f32.gemm = (struct gemm_parameters) {
990 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
991 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
992 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
993 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
994 .mr = 4,
995 .nr = 4,
996 };
997 }
998 xnn_params.f32.gemm2 = (struct gemm_parameters) {
999 .gemm = NULL,
1000 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001001 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001002 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001003 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001004 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001005 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001006 .cr = 1,
1007 .mr = 4,
1008 };
1009 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001010 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001011 .cr = 1,
1012 .mr = 9,
1013 };
1014 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001015 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001016 .cr = 1,
1017 .mr = 25,
1018 };
1019 xnn_params.f32.avgpool = (struct avgpool_parameters) {
1020 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
1021 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
1022 .mr = 9,
1023 .qr = 8,
1024 };
1025 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
1026 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
1027 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
1028 .mr = 9,
1029 .qr = 8,
1030 };
1031 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
1032 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
1033 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
1034 .mr = 7,
1035 };
1036 xnn_params.f32.maxpool = (struct maxpool_parameters) {
1037 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
1038 .mr = 9,
1039 .qr = 8,
1040 };
1041 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1042 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1043 .mr = 4,
1044 };
1045 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1046 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1047 .mr = 9,
1048 };
1049 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1050 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1051 .mr = 9,
1052 .qr = 8,
1053 };
Marat Dukhan69722492019-11-11 19:55:50 -08001054 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1055 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1056 .pixel_tile = 1,
1057 .channel_tile = 2,
1058 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001059 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1060 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1061 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001062 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1063 .row_tile = 4,
1064 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001065 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001066 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4;
Marat Dukhanca2733c2019-11-15 23:21:17 -08001067 xnn_params.f32.vmul = (struct vbinop_parameters) {
1068 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__scalar_x4,
1069 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1070 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1071 .element_tile = 8,
1072 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001073 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001074 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1075 .channel_tile = 1,
1076 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001077 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001078 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001079 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001080 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1081 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001082 .nr = 1,
1083 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001084 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1085 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1086 .mr = 8,
1087 .nr = 2,
1088 };
1089 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1090 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1091 .mr = 8,
1092 .nr = 4,
1093 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001094 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1095 .ukernel_with_symm_padding =
1096 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1097 .output_channel_tile = 4,
1098 .output_height_tile = 1,
1099 .output_width_tile = 1,
1100 };
1101 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1102 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1103 .input_width_tile = 1,
1104 .output_width_tile = 1,
1105 .output_height_tile = 1,
1106 };
1107 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1108 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1109 .input_width_tile = 1,
1110 .output_width_tile = 1,
1111 .output_height_tile = 1,
1112 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001113 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1114 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1115 .input_width_tile = 1,
1116 .output_width_tile = 1,
1117 .output_height_tile = 1,
1118 };
1119 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1120 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1121 .input_width_tile = 1,
1122 .output_width_tile = 1,
1123 .output_height_tile = 1,
1124 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001125 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1126 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1127 .channel_tile = 1,
1128 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001129 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001130 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001131
1132 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001133 #ifndef XNN_NO_X32_OPERATORS
1134 xnn_params.x32.pad = (struct pad_parameters) {
1135 .ukernel = xnn_x32_pad_x2__scalar,
1136 .mr = 2,
1137 };
1138 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1139 xnn_params.x32.zip = (struct zip_parameters) {
1140 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1141 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1142 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1143 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1144 };
1145 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001146
1147#else
1148 #error "Unsupported architecture"
1149#endif
1150 xnn_params.initialized = true;
1151}
1152
1153enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001154 #ifndef __EMSCRIPTEN__
1155 if (!cpuinfo_initialize()) {
1156 return xnn_status_out_of_memory;
1157 }
1158 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001159 pthread_once(&init_guard, &init);
1160 if (xnn_params.initialized) {
1161 return xnn_status_success;
1162 } else {
1163 return xnn_status_unsupported_hardware;
1164 }
1165}
1166
1167enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001168 #ifndef __EMSCRIPTEN__
1169 cpuinfo_deinitialize();
1170 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001171 return xnn_status_success;
1172}