blob: dbcae34bb6f750823545583706ddf22d3217b7b2 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080012#include <string.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
14#include <pthread.h>
15
Marat Dukhand343c222019-10-07 09:22:14 -070016#ifndef __EMSCRIPTEN__
17 #include <cpuinfo.h>
18#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
20#include <xnnpack.h>
21#include <xnnpack/argmaxpool.h>
22#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080023#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070026#include <xnnpack/conv.h>
27#include <xnnpack/dwconv.h>
28#include <xnnpack/gavgpool.h>
29#include <xnnpack/gemm.h>
30#include <xnnpack/hswish.h>
31#include <xnnpack/igemm.h>
32#include <xnnpack/log.h>
33#include <xnnpack/lut.h>
34#include <xnnpack/maxpool.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080035#include <xnnpack/memory.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070036#include <xnnpack/pad.h>
37#include <xnnpack/params.h>
38#include <xnnpack/pavgpool.h>
39#include <xnnpack/prelu.h>
40#include <xnnpack/rmax.h>
41#include <xnnpack/spmm.h>
42#include <xnnpack/unpool.h>
43#include <xnnpack/vadd.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080044#include <xnnpack/vbinop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/vmulcaddc.h>
Marat Dukhan346a9e52019-11-15 09:06:30 -080046#include <xnnpack/vunop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070047#include <xnnpack/zip.h>
48
49#ifndef XNN_ENABLE_ASSEMBLY
50 #define XNN_ENABLE_ASSEMBLY 1
51#endif
52
53static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
54
55struct xnn_parameters xnn_params = {
56 .initialized = false
57};
58
Marat Dukhan1dadbf72019-10-01 10:46:20 -070059#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
61#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
64#endif
65
66static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070067#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 if (!cpuinfo_has_arm_neon()) {
69 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
70 return;
71 }
72
73 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070074 #ifndef XNN_NO_Q8_OPERATORS
75 xnn_params.q8.gemm = (struct gemm_parameters) {
76 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
77 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
78 .mr = 4,
79 .nr = 8,
80 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070081
Marat Dukhan8fe54e42019-10-10 14:12:59 -070082 #if XNN_ENABLE_ASSEMBLY
83 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
84 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
85 .cr = 8,
86 .mr = 9,
87 };
88 #else
89 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
90 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
91 .cr = 8,
92 .mr = 9,
93 };
94 #endif
95 xnn_params.q8.avgpool = (struct avgpool_parameters) {
96 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
97 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
98 .mr = 9,
99 .qr = 8,
100 };
101 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
102 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
103 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
104 .mr = 7,
105 };
106 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
107 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700108
109 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700110 #ifndef XNN_NO_U8_OPERATORS
111 xnn_params.u8.maxpool = (struct maxpool_parameters) {
112 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
113 .mr = 9,
114 .qr = 8,
115 };
116 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
117 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
118 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
119 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120
121 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700122 #ifndef XNN_NO_X8_OPERATORS
123 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
124 xnn_params.x8.zip = (struct zip_parameters) {
125 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
126 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
127 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
128 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
129 };
130 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700131
132 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700133 #ifndef XNN_NO_F32_OPERATORS
134 xnn_params.f32.gemm = (struct gemm_parameters) {
135 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
136 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
137 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
138 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
139 .mr = 4,
140 .nr = 8,
141 };
142 xnn_params.f32.gemm2 = (struct gemm_parameters) {
143 .gemm = NULL,
144 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
145 .mr = 4,
146 .nr = 2,
147 };
148 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
149 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
150 .cr = 4,
151 .mr = 4,
152 };
153 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
154 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
155 .cr = 4,
156 .mr = 9,
157 };
158 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
159 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
160 .cr = 4,
161 .mr = 25,
162 };
163 xnn_params.f32.avgpool = (struct avgpool_parameters) {
164 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
165 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
166 .mr = 9,
167 .qr = 8,
168 };
169 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
170 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
171 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
172 .mr = 9,
173 .qr = 8,
174 };
175 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
176 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
177 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
178 .mr = 7,
179 };
180 xnn_params.f32.maxpool = (struct maxpool_parameters) {
181 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
182 .mr = 9,
183 .qr = 8,
184 };
185 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
186 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
187 .mr = 4,
188 };
189 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
190 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
191 .mr = 9,
192 };
193 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
194 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
195 .mr = 9,
196 .qr = 8,
197 };
Marat Dukhan69722492019-11-11 19:55:50 -0800198 xnn_params.f32.bilinear = (struct bilinear_parameters) {
199 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
200 .pixel_tile = 1,
201 .channel_tile = 8,
202 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700203 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
204 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
205 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800206 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
207 .row_tile = 2,
208 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700209 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800210 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800211 xnn_params.f32.vmul = (struct vbinop_parameters) {
212 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
213 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
214 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
215 .element_tile = 8,
216 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700217 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800218 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
219 .channel_tile = 4,
220 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700221 };
222 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223
224 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700225 #ifndef XNN_NO_X32_OPERATORS
226 xnn_params.x32.pad = (struct pad_parameters) {
227 .ukernel = xnn_x32_pad_x2__neon,
228 .mr = 2,
229 };
230 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
231 xnn_params.x32.zip = (struct zip_parameters) {
232 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
233 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
234 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
235 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
236 };
237 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700238
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700239#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700240
241 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700242 #ifndef XNN_NO_Q8_OPERATORS
243 xnn_params.q8.gemm = (struct gemm_parameters) {
244 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
245 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
246 .mr = 8,
247 .nr = 8,
248 };
249 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
250 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
251 .cr = 8,
252 .mr = 9,
253 };
254 xnn_params.q8.avgpool = (struct avgpool_parameters) {
255 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
256 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
257 .mr = 9,
258 .qr = 8,
259 };
260 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
261 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
262 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
263 .mr = 7,
264 };
265 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
266 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700267
268 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700269 #ifndef XNN_NO_U8_OPERATORS
270 xnn_params.u8.maxpool = (struct maxpool_parameters) {
271 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
272 .mr = 9,
273 .qr = 8,
274 };
275 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
276 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
277 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
278 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700279
280 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700281 #ifndef XNN_NO_X8_OPERATORS
282 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
283 xnn_params.x8.zip = (struct zip_parameters) {
284 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
285 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
286 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
287 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
288 };
289 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700290
291 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700292 #ifndef XNN_NO_F32_OPERATORS
293 #if XNN_ENABLE_ASSEMBLY
294 switch (cpuinfo_get_core(0)->uarch) {
295 case cpuinfo_uarch_kryo:
296 xnn_params.f32.gemm = (struct gemm_parameters) {
297 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
298 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
299 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
300 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
301 .mr = 4,
302 .nr = 8,
303 };
304 break;
305 case cpuinfo_uarch_cortex_a57:
306 xnn_params.f32.gemm = (struct gemm_parameters) {
307 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
308 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
309 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
310 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
311 .mr = 6,
312 .nr = 8,
313 };
314 break;
315 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700316 xnn_params.f32.gemm = (struct gemm_parameters) {
317 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
318 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
319 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
320 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
321 .mr = 4,
322 .nr = 8,
323 };
324 break;
325 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700326 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700327 case cpuinfo_uarch_mongoose_m1:
328 case cpuinfo_uarch_mongoose_m2:
329 case cpuinfo_uarch_meerkat_m3:
330 case (cpuinfo_uarch_meerkat_m3 + 1):
331 xnn_params.f32.gemm = (struct gemm_parameters) {
332 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
333 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
334 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
335 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
336 .mr = 6,
337 .nr = 8,
338 };
339 break;
340 case cpuinfo_uarch_cortex_a53:
341 case cpuinfo_uarch_cortex_a55:
342 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700343 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
344 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
345 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
346 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
347 .mr = 6,
348 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700349 };
350 break;
351 case cpuinfo_uarch_cortex_a73:
352 xnn_params.f32.gemm = (struct gemm_parameters) {
353 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
354 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
355 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
356 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
357 .mr = 6,
358 .nr = 8,
359 };
360 break;
361 default:
362 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700363 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
364 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700365 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
366 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700367 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700368 .nr = 8,
369 };
370 break;
371 }
372 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700373 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700374 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
375 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700376 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
377 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700378 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700379 .nr = 8,
380 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700381 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700382
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700383 xnn_params.f32.gemm2 = (struct gemm_parameters) {
384 .gemm = NULL,
385 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
386 .mr = 4,
387 .nr = 2,
388 };
389 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
390 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
391 .cr = 4,
392 .mr = 4,
393 };
394 switch (cpuinfo_get_core(0)->uarch) {
395 case cpuinfo_uarch_kryo:
396 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
397 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
398 .cr = 4,
399 .mr = 9,
400 };
401 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700402#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700403 case cpuinfo_uarch_cortex_a53:
404 case cpuinfo_uarch_cortex_a55:
405 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
406 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
407 .cr = 4,
408 .mr = 9,
409 };
410 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700411#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700412 default:
413 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
414 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
415 .cr = 8,
416 .mr = 9,
417 };
418 break;
419 }
420 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
421 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
422 .cr = 4,
423 .mr = 25,
424 };
425 xnn_params.f32.avgpool = (struct avgpool_parameters) {
426 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
427 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
428 .mr = 9,
429 .qr = 8,
430 };
431 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
432 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
433 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
434 .mr = 9,
435 .qr = 8,
436 };
437 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
438 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
439 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
440 .mr = 7,
441 };
442 xnn_params.f32.maxpool = (struct maxpool_parameters) {
443 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
444 .mr = 9,
445 .qr = 8,
446 };
447 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
448 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
449 .mr = 4,
450 };
451 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
452 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
453 .mr = 9,
454 };
455 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
456 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
457 .mr = 9,
458 .qr = 8,
459 };
Marat Dukhan69722492019-11-11 19:55:50 -0800460 xnn_params.f32.bilinear = (struct bilinear_parameters) {
461 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
462 .pixel_tile = 1,
463 .channel_tile = 8,
464 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700465 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
466 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan14bec502019-11-18 11:35:31 -0800467 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700468 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800469 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
470 .row_tile = 2,
471 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700472 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800473 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800474 xnn_params.f32.vmul = (struct vbinop_parameters) {
475 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
476 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
477 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
478 .element_tile = 8,
479 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700480 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800481 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
482 .channel_tile = 4,
483 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700484 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800485 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700486 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700487 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700488 .mr = 16,
489 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700490 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700491 xnn_params.f32.spmm2 = (struct spmm_parameters) {
492 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
493 .mr = 16,
494 .nr = 2,
495 };
496 xnn_params.f32.spmm4 = (struct spmm_parameters) {
497 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
498 .mr = 16,
499 .nr = 4,
500 };
501 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
502 .ukernel_with_symm_padding =
503 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
504 .output_channel_tile = 4,
505 .output_height_tile = 2,
506 .output_width_tile = 2,
507 };
508 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
509 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
510 .input_width_tile = 4,
511 .output_width_tile = 4,
512 .output_height_tile = 3,
513 };
514 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
515 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
516 .input_width_tile = 4,
517 .output_width_tile = 4,
518 .output_height_tile = 1,
519 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800520 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
521 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
522 .input_width_tile = 4,
523 .output_width_tile = 4,
Erich Elsen4ad51152019-11-19 13:11:53 -0800524 .output_height_tile = 3,
Marat Dukhana99918a2019-11-15 14:40:12 -0800525 };
526 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
527 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
528 .input_width_tile = 4,
529 .output_width_tile = 4,
530 .output_height_tile = 1,
531 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700532 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
533 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
534 .channel_tile = 4,
535 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800536 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700537 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700538
539 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700540 #ifndef XNN_NO_X32_OPERATORS
541 xnn_params.x32.pad = (struct pad_parameters) {
542 .ukernel = xnn_x32_pad_x2__neon,
543 .mr = 2,
544 };
545 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
546 xnn_params.x32.zip = (struct zip_parameters) {
547 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
548 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
549 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
550 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
551 };
552 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700553
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700554#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700555 if (!cpuinfo_has_x86_sse2()) {
556 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
557 return;
558 }
559
560 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700561 #ifndef XNN_NO_Q8_OPERATORS
562 xnn_params.q8.gemm = (struct gemm_parameters) {
563 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
564 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
565 .mr = 4,
566 .nr = 4,
567 .log2_kr = 1,
568 };
569 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
570 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
571 .cr = 8,
572 .mr = 9,
573 };
574 xnn_params.q8.avgpool = (struct avgpool_parameters) {
575 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
576 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
577 .mr = 9,
578 .qr = 8,
579 };
580 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
581 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
582 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
583 .mr = 7,
584 };
585 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
586 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700587
588 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700589 #ifndef XNN_NO_U8_OPERATORS
590 xnn_params.u8.maxpool = (struct maxpool_parameters) {
591 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
592 .mr = 9,
593 .qr = 8,
594 };
595 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
596 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
597 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
598 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700599
600 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700601 #ifndef XNN_NO_X8_OPERATORS
602 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
603 xnn_params.x8.zip = (struct zip_parameters) {
604 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
605 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
606 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
607 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
608 };
609 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700610
611 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700612 #ifndef XNN_NO_F32_OPERATORS
613 xnn_params.f32.gemm = (struct gemm_parameters) {
614 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
615 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
616 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
617 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
618 .mr = 4,
619 .nr = 8,
620 };
621 xnn_params.f32.gemm2 = (struct gemm_parameters) {
622 .gemm = NULL,
623 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
624 .mr = 4,
625 .nr = 2,
626 .log2_kr = 2,
627 };
628 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800629 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
630 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700631 .mr = 4,
632 };
633 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800634 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
635 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700636 .mr = 9,
637 };
638 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800639 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
640 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700641 .mr = 25,
642 };
643 xnn_params.f32.avgpool = (struct avgpool_parameters) {
644 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
645 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
646 .mr = 9,
647 .qr = 8,
648 };
649 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
650 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
651 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
652 .mr = 9,
653 .qr = 8,
654 };
655 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
656 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
657 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
658 .mr = 7,
659 };
660 xnn_params.f32.maxpool = (struct maxpool_parameters) {
661 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
662 .mr = 9,
663 .qr = 8,
664 };
665 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
666 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
667 .mr = 4,
668 };
669 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
670 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
671 .mr = 9,
672 };
673 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
674 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
675 .mr = 9,
676 .qr = 8,
677 };
Marat Dukhan69722492019-11-11 19:55:50 -0800678 xnn_params.f32.bilinear = (struct bilinear_parameters) {
679 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
680 .pixel_tile = 1,
681 .channel_tile = 8,
682 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700683 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
684 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
Marat Dukhan7bee7512019-11-18 15:15:48 -0800685 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700686 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800687 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
688 .row_tile = 2,
689 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700690 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800691 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800692 xnn_params.f32.vmul = (struct vbinop_parameters) {
693 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__sse_x8,
694 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
695 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
696 .element_tile = 8,
697 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700698 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800699 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
700 .channel_tile = 4,
701 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700702 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800703 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700704 xnn_params.f32.spmm = (struct spmm_parameters) {
705 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
706 .mr = 4,
707 .nr = 1,
708 };
709 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
710 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
711 .input_width_tile = 4,
712 .output_width_tile = 4,
713 .output_height_tile = 1,
714 };
715 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
716 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
717 .input_width_tile = 4,
718 .output_width_tile = 4,
719 .output_height_tile = 1,
720 };
721 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
722 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
723 .channel_tile = 4,
724 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800725 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700726 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700727
728 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700729 #ifndef XNN_NO_X32_OPERATORS
730 xnn_params.x32.pad = (struct pad_parameters) {
731 .ukernel = xnn_x32_pad_x2__sse2,
732 .mr = 2,
733 };
734 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
735 xnn_params.x32.zip = (struct zip_parameters) {
736 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
737 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
738 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
739 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
740 };
741 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700742
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700743#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700744 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
745 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
746 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
747 // of two infinities (must produce NaN per IEEE 754 standard).
748 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
749 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
750
XNNPACK Teamb455b122019-09-27 18:10:33 -0700751 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700752 #ifndef XNN_NO_Q8_OPERATORS
753 xnn_params.q8.gemm = (struct gemm_parameters) {
754 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
755 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
756 .mr = 2,
757 .nr = 2,
758 };
759 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
760 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
761 .cr = 1,
762 .mr = 9,
763 };
764 xnn_params.q8.avgpool = (struct avgpool_parameters) {
765 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
766 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
767 .mr = 9,
768 .qr = 8,
769 };
770 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
771 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
772 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
773 .mr = 7,
774 };
775 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
776 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700777
778 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700779 #ifndef XNN_NO_U8_OPERATORS
780 xnn_params.u8.maxpool = (struct maxpool_parameters) {
781 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
782 .mr = 9,
783 .qr = 8,
784 };
785 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
786 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
787 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
788 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700789
790 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700791 #ifndef XNN_NO_X8_OPERATORS
792 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
793 xnn_params.x8.zip = (struct zip_parameters) {
794 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
795 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
796 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
797 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
798 };
799 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700800
801 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700802 #ifndef XNN_NO_F32_OPERATORS
803 if (is_wasm_x86) {
804 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700805 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
806 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
807 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
808 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700809 .mr = 4,
810 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700811 };
812 } else {
813 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700814 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
815 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
816 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
817 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700818 .mr = 6,
819 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700820 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700821 };
822 }
823 xnn_params.f32.gemm2 = (struct gemm_parameters) {
824 .gemm = NULL,
825 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700826 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700827 .nr = 2,
828 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700829 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700830 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800831 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700832 .cr = 4,
833 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700834 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700835 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800836 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700837 .cr = 4,
838 .mr = 9,
839 };
840 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800841 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700842 .cr = 4,
843 .mr = 25,
844 };
845 xnn_params.f32.avgpool = (struct avgpool_parameters) {
846 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
847 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
848 .mr = 9,
849 .qr = 8,
850 };
851 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
852 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
853 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
854 .mr = 9,
855 .qr = 8,
856 };
857 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
858 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
859 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
860 .mr = 7,
861 };
862 xnn_params.f32.maxpool = (struct maxpool_parameters) {
863 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
864 .mr = 9,
865 .qr = 8,
866 };
867 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
868 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
869 .mr = 4,
870 };
871 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
872 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
873 .mr = 9,
874 };
875 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
876 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
877 .mr = 9,
878 .qr = 8,
879 };
Marat Dukhan69722492019-11-11 19:55:50 -0800880 xnn_params.f32.bilinear = (struct bilinear_parameters) {
881 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
882 .pixel_tile = 1,
883 .channel_tile = 8,
884 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700885 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
886 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
887 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800888 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
889 .row_tile = 2,
890 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700891 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800892 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800893 xnn_params.f32.vmul = (struct vbinop_parameters) {
894 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__psimd_x8,
895 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
896 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
897 .element_tile = 8,
898 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700899 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800900 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
901 .channel_tile = 4,
902 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700903 };
904 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700905
906 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700907 #ifndef XNN_NO_X32_OPERATORS
908 xnn_params.x32.pad = (struct pad_parameters) {
909 .ukernel = xnn_x32_pad_x2__psimd,
910 .mr = 2,
911 };
912 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
913 xnn_params.x32.zip = (struct zip_parameters) {
914 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
915 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
916 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
917 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
918 };
919 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700920
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700921#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700922 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
923 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
924 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
925 // of two infinities (must produce NaN per IEEE 754 standard).
926 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
927 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
928
929 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700930 #ifndef XNN_NO_Q8_OPERATORS
931 xnn_params.q8.gemm = (struct gemm_parameters) {
932 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
933 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
934 .mr = 2,
935 .nr = 2,
936 };
937 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
938 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
939 .cr = 1,
940 .mr = 9,
941 };
942 xnn_params.q8.avgpool = (struct avgpool_parameters) {
943 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
944 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
945 .mr = 9,
946 .qr = 8,
947 };
948 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
949 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
950 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
951 .mr = 7,
952 };
953 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
954 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700955
956 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700957 #ifndef XNN_NO_U8_OPERATORS
958 xnn_params.u8.maxpool = (struct maxpool_parameters) {
959 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
960 .mr = 9,
961 .qr = 8,
962 };
963 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
964 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
965 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
966 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700967
968 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700969 #ifndef XNN_NO_X8_OPERATORS
970 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
971 xnn_params.x8.zip = (struct zip_parameters) {
972 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
973 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
974 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
975 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
976 };
977 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700978
979 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700980 #ifndef XNN_NO_F32_OPERATORS
981 if (is_wasm_x86) {
982 xnn_params.f32.gemm = (struct gemm_parameters) {
983 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
984 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
985 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
986 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
987 .mr = 2,
988 .nr = 4,
989 };
990 } else {
991 xnn_params.f32.gemm = (struct gemm_parameters) {
992 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
993 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
994 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
995 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
996 .mr = 4,
997 .nr = 4,
998 };
999 }
1000 xnn_params.f32.gemm2 = (struct gemm_parameters) {
1001 .gemm = NULL,
1002 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001003 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001004 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001005 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001006 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001007 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001008 .cr = 1,
1009 .mr = 4,
1010 };
1011 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001012 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001013 .cr = 1,
1014 .mr = 9,
1015 };
1016 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001017 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001018 .cr = 1,
1019 .mr = 25,
1020 };
1021 xnn_params.f32.avgpool = (struct avgpool_parameters) {
1022 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
1023 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
1024 .mr = 9,
1025 .qr = 8,
1026 };
1027 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
1028 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
1029 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
1030 .mr = 9,
1031 .qr = 8,
1032 };
1033 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
1034 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
1035 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
1036 .mr = 7,
1037 };
1038 xnn_params.f32.maxpool = (struct maxpool_parameters) {
1039 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
1040 .mr = 9,
1041 .qr = 8,
1042 };
1043 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1044 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1045 .mr = 4,
1046 };
1047 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1048 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1049 .mr = 9,
1050 };
1051 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1052 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1053 .mr = 9,
1054 .qr = 8,
1055 };
Marat Dukhan69722492019-11-11 19:55:50 -08001056 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1057 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1058 .pixel_tile = 1,
1059 .channel_tile = 2,
1060 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001061 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1062 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1063 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001064 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1065 .row_tile = 4,
1066 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001067 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001068 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4;
Marat Dukhanca2733c2019-11-15 23:21:17 -08001069 xnn_params.f32.vmul = (struct vbinop_parameters) {
1070 .op_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmul_ukernel__scalar_x4,
1071 .opc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1072 .ropc_ukernel = (xnn_vbinop_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1073 .element_tile = 8,
1074 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001075 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001076 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1077 .channel_tile = 1,
1078 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001079 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001080 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001081 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001082 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1083 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001084 .nr = 1,
1085 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001086 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1087 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1088 .mr = 8,
1089 .nr = 2,
1090 };
1091 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1092 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1093 .mr = 8,
1094 .nr = 4,
1095 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001096 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1097 .ukernel_with_symm_padding =
1098 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1099 .output_channel_tile = 4,
1100 .output_height_tile = 1,
1101 .output_width_tile = 1,
1102 };
1103 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1104 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1105 .input_width_tile = 1,
1106 .output_width_tile = 1,
1107 .output_height_tile = 1,
1108 };
1109 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1110 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1111 .input_width_tile = 1,
1112 .output_width_tile = 1,
1113 .output_height_tile = 1,
1114 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001115 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1116 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1117 .input_width_tile = 1,
1118 .output_width_tile = 1,
1119 .output_height_tile = 1,
1120 };
1121 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1122 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1123 .input_width_tile = 1,
1124 .output_width_tile = 1,
1125 .output_height_tile = 1,
1126 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001127 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1128 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1129 .channel_tile = 1,
1130 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001131 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001132 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001133
1134 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001135 #ifndef XNN_NO_X32_OPERATORS
1136 xnn_params.x32.pad = (struct pad_parameters) {
1137 .ukernel = xnn_x32_pad_x2__scalar,
1138 .mr = 2,
1139 };
1140 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1141 xnn_params.x32.zip = (struct zip_parameters) {
1142 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1143 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1144 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1145 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1146 };
1147 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001148
1149#else
1150 #error "Unsupported architecture"
1151#endif
1152 xnn_params.initialized = true;
1153}
1154
Marat Dukhan04f03be2019-11-19 12:36:47 -08001155enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
Marat Dukhand343c222019-10-07 09:22:14 -07001156 #ifndef __EMSCRIPTEN__
1157 if (!cpuinfo_initialize()) {
1158 return xnn_status_out_of_memory;
1159 }
1160 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001161 pthread_once(&init_guard, &init);
1162 if (xnn_params.initialized) {
Marat Dukhan04f03be2019-11-19 12:36:47 -08001163 if (allocator != NULL) {
1164 memcpy(&xnn_params.allocator, allocator, sizeof(struct xnn_allocator));
1165 } else {
1166 xnn_params.allocator.allocate = &xnn_allocate;
1167 xnn_params.allocator.reallocate = &xnn_reallocate;
1168 xnn_params.allocator.deallocate = &xnn_deallocate;
1169 xnn_params.allocator.aligned_allocate = &xnn_aligned_allocate;
1170 xnn_params.allocator.aligned_deallocate = &xnn_aligned_deallocate;
1171 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001172 return xnn_status_success;
1173 } else {
1174 return xnn_status_unsupported_hardware;
1175 }
1176}
1177
1178enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001179 #ifndef __EMSCRIPTEN__
1180 cpuinfo_deinitialize();
1181 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001182 return xnn_status_success;
1183}