blob: b93c0b09de2b421ca9b09f48f75b87e805e3233c [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080022#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
30#include <xnnpack/igemm.h>
31#include <xnnpack/log.h>
32#include <xnnpack/lut.h>
33#include <xnnpack/maxpool.h>
34#include <xnnpack/pad.h>
35#include <xnnpack/params.h>
36#include <xnnpack/pavgpool.h>
37#include <xnnpack/prelu.h>
38#include <xnnpack/rmax.h>
39#include <xnnpack/spmm.h>
40#include <xnnpack/unpool.h>
41#include <xnnpack/vadd.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080042#include <xnnpack/vbinop.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070043#include <xnnpack/vmulcaddc.h>
44#include <xnnpack/zip.h>
45
46#ifndef XNN_ENABLE_ASSEMBLY
47 #define XNN_ENABLE_ASSEMBLY 1
48#endif
49
50static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
51
52struct xnn_parameters xnn_params = {
53 .initialized = false
54};
55
Marat Dukhan1dadbf72019-10-01 10:46:20 -070056#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070057 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
58#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070059#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
61#endif
62
63static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070064#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070065 if (!cpuinfo_has_arm_neon()) {
66 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
67 return;
68 }
69
70 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070071 #ifndef XNN_NO_Q8_OPERATORS
72 xnn_params.q8.gemm = (struct gemm_parameters) {
73 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
74 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
75 .mr = 4,
76 .nr = 8,
77 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070078
Marat Dukhan8fe54e42019-10-10 14:12:59 -070079 #if XNN_ENABLE_ASSEMBLY
80 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
81 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
82 .cr = 8,
83 .mr = 9,
84 };
85 #else
86 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
87 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
88 .cr = 8,
89 .mr = 9,
90 };
91 #endif
92 xnn_params.q8.avgpool = (struct avgpool_parameters) {
93 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
94 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
95 .mr = 9,
96 .qr = 8,
97 };
98 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
99 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
100 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
101 .mr = 7,
102 };
103 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
104 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700105
106 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700107 #ifndef XNN_NO_U8_OPERATORS
108 xnn_params.u8.maxpool = (struct maxpool_parameters) {
109 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
110 .mr = 9,
111 .qr = 8,
112 };
113 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
114 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
115 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
116 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700117
118 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700119 #ifndef XNN_NO_X8_OPERATORS
120 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
121 xnn_params.x8.zip = (struct zip_parameters) {
122 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
123 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
124 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
125 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
126 };
127 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700128
129 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700130 #ifndef XNN_NO_F32_OPERATORS
131 xnn_params.f32.gemm = (struct gemm_parameters) {
132 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
133 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
134 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
135 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
136 .mr = 4,
137 .nr = 8,
138 };
139 xnn_params.f32.gemm2 = (struct gemm_parameters) {
140 .gemm = NULL,
141 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
142 .mr = 4,
143 .nr = 2,
144 };
145 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
146 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
147 .cr = 4,
148 .mr = 4,
149 };
150 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
151 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
152 .cr = 4,
153 .mr = 9,
154 };
155 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
156 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
157 .cr = 4,
158 .mr = 25,
159 };
160 xnn_params.f32.avgpool = (struct avgpool_parameters) {
161 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
162 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
163 .mr = 9,
164 .qr = 8,
165 };
166 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
167 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
168 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
169 .mr = 9,
170 .qr = 8,
171 };
172 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
173 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
174 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
175 .mr = 7,
176 };
177 xnn_params.f32.maxpool = (struct maxpool_parameters) {
178 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
179 .mr = 9,
180 .qr = 8,
181 };
182 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
183 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
184 .mr = 4,
185 };
186 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
187 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
188 .mr = 9,
189 };
190 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
191 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
192 .mr = 9,
193 .qr = 8,
194 };
Marat Dukhan69722492019-11-11 19:55:50 -0800195 xnn_params.f32.bilinear = (struct bilinear_parameters) {
196 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
197 .pixel_tile = 1,
198 .channel_tile = 8,
199 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700200 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
201 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
202 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800203 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
204 .row_tile = 2,
205 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700206 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800207 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700208 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800209 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
210 .channel_tile = 4,
211 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700212 };
213 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700214
215 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700216 #ifndef XNN_NO_X32_OPERATORS
217 xnn_params.x32.pad = (struct pad_parameters) {
218 .ukernel = xnn_x32_pad_x2__neon,
219 .mr = 2,
220 };
221 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
222 xnn_params.x32.zip = (struct zip_parameters) {
223 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
224 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
225 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
226 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
227 };
228 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700229
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700230#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700231
232 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700233 #ifndef XNN_NO_Q8_OPERATORS
234 xnn_params.q8.gemm = (struct gemm_parameters) {
235 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
236 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
237 .mr = 8,
238 .nr = 8,
239 };
240 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
241 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
242 .cr = 8,
243 .mr = 9,
244 };
245 xnn_params.q8.avgpool = (struct avgpool_parameters) {
246 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
247 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
248 .mr = 9,
249 .qr = 8,
250 };
251 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
252 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
253 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
254 .mr = 7,
255 };
256 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
257 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700258
259 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700260 #ifndef XNN_NO_U8_OPERATORS
261 xnn_params.u8.maxpool = (struct maxpool_parameters) {
262 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
263 .mr = 9,
264 .qr = 8,
265 };
266 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
267 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
268 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
269 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700270
271 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700272 #ifndef XNN_NO_X8_OPERATORS
273 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
274 xnn_params.x8.zip = (struct zip_parameters) {
275 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
276 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
277 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
278 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
279 };
280 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700281
282 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700283 #ifndef XNN_NO_F32_OPERATORS
284 #if XNN_ENABLE_ASSEMBLY
285 switch (cpuinfo_get_core(0)->uarch) {
286 case cpuinfo_uarch_kryo:
287 xnn_params.f32.gemm = (struct gemm_parameters) {
288 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
289 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
290 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
291 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
292 .mr = 4,
293 .nr = 8,
294 };
295 break;
296 case cpuinfo_uarch_cortex_a57:
297 xnn_params.f32.gemm = (struct gemm_parameters) {
298 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
299 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
300 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
301 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
302 .mr = 6,
303 .nr = 8,
304 };
305 break;
306 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700307 xnn_params.f32.gemm = (struct gemm_parameters) {
308 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
309 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
310 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
311 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
312 .mr = 4,
313 .nr = 8,
314 };
315 break;
316 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700317 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700318 case cpuinfo_uarch_mongoose_m1:
319 case cpuinfo_uarch_mongoose_m2:
320 case cpuinfo_uarch_meerkat_m3:
321 case (cpuinfo_uarch_meerkat_m3 + 1):
322 xnn_params.f32.gemm = (struct gemm_parameters) {
323 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
324 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
325 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
326 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
327 .mr = 6,
328 .nr = 8,
329 };
330 break;
331 case cpuinfo_uarch_cortex_a53:
332 case cpuinfo_uarch_cortex_a55:
333 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700334 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
335 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
336 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
337 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
338 .mr = 6,
339 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700340 };
341 break;
342 case cpuinfo_uarch_cortex_a73:
343 xnn_params.f32.gemm = (struct gemm_parameters) {
344 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
345 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
346 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
347 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
348 .mr = 6,
349 .nr = 8,
350 };
351 break;
352 default:
353 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700354 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
355 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700356 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
357 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700358 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700359 .nr = 8,
360 };
361 break;
362 }
363 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700364 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700365 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
366 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700367 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
368 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700369 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700370 .nr = 8,
371 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700372 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700373
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700374 xnn_params.f32.gemm2 = (struct gemm_parameters) {
375 .gemm = NULL,
376 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
377 .mr = 4,
378 .nr = 2,
379 };
380 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
381 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
382 .cr = 4,
383 .mr = 4,
384 };
385 switch (cpuinfo_get_core(0)->uarch) {
386 case cpuinfo_uarch_kryo:
387 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
388 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
389 .cr = 4,
390 .mr = 9,
391 };
392 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700393#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700394 case cpuinfo_uarch_cortex_a53:
395 case cpuinfo_uarch_cortex_a55:
396 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
397 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
398 .cr = 4,
399 .mr = 9,
400 };
401 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700402#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700403 default:
404 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
405 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
406 .cr = 8,
407 .mr = 9,
408 };
409 break;
410 }
411 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
412 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
413 .cr = 4,
414 .mr = 25,
415 };
416 xnn_params.f32.avgpool = (struct avgpool_parameters) {
417 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
418 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
419 .mr = 9,
420 .qr = 8,
421 };
422 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
423 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
424 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
425 .mr = 9,
426 .qr = 8,
427 };
428 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
429 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
430 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
431 .mr = 7,
432 };
433 xnn_params.f32.maxpool = (struct maxpool_parameters) {
434 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
435 .mr = 9,
436 .qr = 8,
437 };
438 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
439 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
440 .mr = 4,
441 };
442 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
443 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
444 .mr = 9,
445 };
446 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
447 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
448 .mr = 9,
449 .qr = 8,
450 };
Marat Dukhan69722492019-11-11 19:55:50 -0800451 xnn_params.f32.bilinear = (struct bilinear_parameters) {
452 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
453 .pixel_tile = 1,
454 .channel_tile = 8,
455 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700456 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
457 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
458 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800459 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
460 .row_tile = 2,
461 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700462 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800463 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__neon_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700464 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800465 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
466 .channel_tile = 4,
467 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700468 };
469 #ifndef XNN_NO_SPNCHW_OPERATORS
470 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700471 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700472 .mr = 16,
473 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700474 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700475 xnn_params.f32.spmm2 = (struct spmm_parameters) {
476 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
477 .mr = 16,
478 .nr = 2,
479 };
480 xnn_params.f32.spmm4 = (struct spmm_parameters) {
481 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
482 .mr = 16,
483 .nr = 4,
484 };
485 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
486 .ukernel_with_symm_padding =
487 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
488 .output_channel_tile = 4,
489 .output_height_tile = 2,
490 .output_width_tile = 2,
491 };
492 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
493 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
494 .input_width_tile = 4,
495 .output_width_tile = 4,
496 .output_height_tile = 3,
497 };
498 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
499 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
500 .input_width_tile = 4,
501 .output_width_tile = 4,
502 .output_height_tile = 1,
503 };
504 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
505 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
506 .channel_tile = 4,
507 };
508 #endif // XNN_NO_SPNCHW_OPERATORS
509 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700510
511 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700512 #ifndef XNN_NO_X32_OPERATORS
513 xnn_params.x32.pad = (struct pad_parameters) {
514 .ukernel = xnn_x32_pad_x2__neon,
515 .mr = 2,
516 };
517 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
518 xnn_params.x32.zip = (struct zip_parameters) {
519 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
520 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
521 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
522 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
523 };
524 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700525
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700526#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700527 if (!cpuinfo_has_x86_sse2()) {
528 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
529 return;
530 }
531
532 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700533 #ifndef XNN_NO_Q8_OPERATORS
534 xnn_params.q8.gemm = (struct gemm_parameters) {
535 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
536 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
537 .mr = 4,
538 .nr = 4,
539 .log2_kr = 1,
540 };
541 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
542 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
543 .cr = 8,
544 .mr = 9,
545 };
546 xnn_params.q8.avgpool = (struct avgpool_parameters) {
547 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
548 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
549 .mr = 9,
550 .qr = 8,
551 };
552 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
553 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
554 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
555 .mr = 7,
556 };
557 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
558 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700559
560 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700561 #ifndef XNN_NO_U8_OPERATORS
562 xnn_params.u8.maxpool = (struct maxpool_parameters) {
563 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
564 .mr = 9,
565 .qr = 8,
566 };
567 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
568 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
569 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
570 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700571
572 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700573 #ifndef XNN_NO_X8_OPERATORS
574 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
575 xnn_params.x8.zip = (struct zip_parameters) {
576 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
577 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
578 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
579 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
580 };
581 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700582
583 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700584 #ifndef XNN_NO_F32_OPERATORS
585 xnn_params.f32.gemm = (struct gemm_parameters) {
586 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
587 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
588 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
589 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
590 .mr = 4,
591 .nr = 8,
592 };
593 xnn_params.f32.gemm2 = (struct gemm_parameters) {
594 .gemm = NULL,
595 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
596 .mr = 4,
597 .nr = 2,
598 .log2_kr = 2,
599 };
600 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800601 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
602 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700603 .mr = 4,
604 };
605 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800606 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
607 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700608 .mr = 9,
609 };
610 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800611 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
612 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700613 .mr = 25,
614 };
615 xnn_params.f32.avgpool = (struct avgpool_parameters) {
616 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
617 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
618 .mr = 9,
619 .qr = 8,
620 };
621 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
622 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
623 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
624 .mr = 9,
625 .qr = 8,
626 };
627 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
628 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
629 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
630 .mr = 7,
631 };
632 xnn_params.f32.maxpool = (struct maxpool_parameters) {
633 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
634 .mr = 9,
635 .qr = 8,
636 };
637 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
638 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
639 .mr = 4,
640 };
641 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
642 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
643 .mr = 9,
644 };
645 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
646 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
647 .mr = 9,
648 .qr = 8,
649 };
Marat Dukhan69722492019-11-11 19:55:50 -0800650 xnn_params.f32.bilinear = (struct bilinear_parameters) {
651 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
652 .pixel_tile = 1,
653 .channel_tile = 8,
654 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700655 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
656 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
657 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800658 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
659 .row_tile = 2,
660 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700661 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800662 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700663 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800664 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
665 .channel_tile = 4,
666 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700667 };
668 #ifndef XNN_NO_SPNCHW_OPERATORS
669 xnn_params.f32.spmm = (struct spmm_parameters) {
670 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
671 .mr = 4,
672 .nr = 1,
673 };
674 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
675 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
676 .input_width_tile = 4,
677 .output_width_tile = 4,
678 .output_height_tile = 1,
679 };
680 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
681 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
682 .input_width_tile = 4,
683 .output_width_tile = 4,
684 .output_height_tile = 1,
685 };
686 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
687 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
688 .channel_tile = 4,
689 };
690 #endif // XNN_NO_SPNCHW_OPERATORS
691 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700692
693 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700694 #ifndef XNN_NO_X32_OPERATORS
695 xnn_params.x32.pad = (struct pad_parameters) {
696 .ukernel = xnn_x32_pad_x2__sse2,
697 .mr = 2,
698 };
699 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
700 xnn_params.x32.zip = (struct zip_parameters) {
701 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
702 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
703 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
704 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
705 };
706 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700707
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700708#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700709 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
710 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
711 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
712 // of two infinities (must produce NaN per IEEE 754 standard).
713 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
714 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
715
XNNPACK Teamb455b122019-09-27 18:10:33 -0700716 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700717 #ifndef XNN_NO_Q8_OPERATORS
718 xnn_params.q8.gemm = (struct gemm_parameters) {
719 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
720 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
721 .mr = 2,
722 .nr = 2,
723 };
724 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
725 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
726 .cr = 1,
727 .mr = 9,
728 };
729 xnn_params.q8.avgpool = (struct avgpool_parameters) {
730 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
731 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
732 .mr = 9,
733 .qr = 8,
734 };
735 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
736 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
737 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
738 .mr = 7,
739 };
740 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
741 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700742
743 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700744 #ifndef XNN_NO_U8_OPERATORS
745 xnn_params.u8.maxpool = (struct maxpool_parameters) {
746 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
747 .mr = 9,
748 .qr = 8,
749 };
750 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
751 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
752 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
753 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700754
755 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700756 #ifndef XNN_NO_X8_OPERATORS
757 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
758 xnn_params.x8.zip = (struct zip_parameters) {
759 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
760 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
761 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
762 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
763 };
764 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700765
766 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700767 #ifndef XNN_NO_F32_OPERATORS
768 if (is_wasm_x86) {
769 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700770 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
771 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
772 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
773 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700774 .mr = 4,
775 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700776 };
777 } else {
778 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700779 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
780 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
781 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
782 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700783 .mr = 6,
784 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700785 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700786 };
787 }
788 xnn_params.f32.gemm2 = (struct gemm_parameters) {
789 .gemm = NULL,
790 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700791 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700792 .nr = 2,
793 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700794 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700795 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800796 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700797 .cr = 4,
798 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700799 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700800 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800801 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700802 .cr = 4,
803 .mr = 9,
804 };
805 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800806 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700807 .cr = 4,
808 .mr = 25,
809 };
810 xnn_params.f32.avgpool = (struct avgpool_parameters) {
811 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
812 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
813 .mr = 9,
814 .qr = 8,
815 };
816 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
817 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
818 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
819 .mr = 9,
820 .qr = 8,
821 };
822 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
823 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
824 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
825 .mr = 7,
826 };
827 xnn_params.f32.maxpool = (struct maxpool_parameters) {
828 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
829 .mr = 9,
830 .qr = 8,
831 };
832 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
833 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
834 .mr = 4,
835 };
836 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
837 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
838 .mr = 9,
839 };
840 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
841 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
842 .mr = 9,
843 .qr = 8,
844 };
Marat Dukhan69722492019-11-11 19:55:50 -0800845 xnn_params.f32.bilinear = (struct bilinear_parameters) {
846 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
847 .pixel_tile = 1,
848 .channel_tile = 8,
849 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700850 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
851 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
852 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800853 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
854 .row_tile = 2,
855 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700856 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800857 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700858 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800859 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
860 .channel_tile = 4,
861 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700862 };
863 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700864
865 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700866 #ifndef XNN_NO_X32_OPERATORS
867 xnn_params.x32.pad = (struct pad_parameters) {
868 .ukernel = xnn_x32_pad_x2__psimd,
869 .mr = 2,
870 };
871 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
872 xnn_params.x32.zip = (struct zip_parameters) {
873 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
874 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
875 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
876 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
877 };
878 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700879
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700880#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700881 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
882 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
883 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
884 // of two infinities (must produce NaN per IEEE 754 standard).
885 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
886 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
887
888 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700889 #ifndef XNN_NO_Q8_OPERATORS
890 xnn_params.q8.gemm = (struct gemm_parameters) {
891 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
892 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
893 .mr = 2,
894 .nr = 2,
895 };
896 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
897 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
898 .cr = 1,
899 .mr = 9,
900 };
901 xnn_params.q8.avgpool = (struct avgpool_parameters) {
902 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
903 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
904 .mr = 9,
905 .qr = 8,
906 };
907 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
908 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
909 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
910 .mr = 7,
911 };
912 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
913 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700914
915 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700916 #ifndef XNN_NO_U8_OPERATORS
917 xnn_params.u8.maxpool = (struct maxpool_parameters) {
918 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
919 .mr = 9,
920 .qr = 8,
921 };
922 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
923 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
924 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
925 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700926
927 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700928 #ifndef XNN_NO_X8_OPERATORS
929 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
930 xnn_params.x8.zip = (struct zip_parameters) {
931 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
932 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
933 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
934 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
935 };
936 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700937
938 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700939 #ifndef XNN_NO_F32_OPERATORS
940 if (is_wasm_x86) {
941 xnn_params.f32.gemm = (struct gemm_parameters) {
942 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
943 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
944 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
945 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
946 .mr = 2,
947 .nr = 4,
948 };
949 } else {
950 xnn_params.f32.gemm = (struct gemm_parameters) {
951 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
952 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
953 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
954 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
955 .mr = 4,
956 .nr = 4,
957 };
958 }
959 xnn_params.f32.gemm2 = (struct gemm_parameters) {
960 .gemm = NULL,
961 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700962 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700963 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700965 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800966 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700967 .cr = 1,
968 .mr = 4,
969 };
970 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800971 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700972 .cr = 1,
973 .mr = 9,
974 };
975 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800976 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700977 .cr = 1,
978 .mr = 25,
979 };
980 xnn_params.f32.avgpool = (struct avgpool_parameters) {
981 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
982 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
983 .mr = 9,
984 .qr = 8,
985 };
986 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
987 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
988 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
989 .mr = 9,
990 .qr = 8,
991 };
992 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
993 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
994 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
995 .mr = 7,
996 };
997 xnn_params.f32.maxpool = (struct maxpool_parameters) {
998 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
999 .mr = 9,
1000 .qr = 8,
1001 };
1002 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1003 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1004 .mr = 4,
1005 };
1006 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1007 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1008 .mr = 9,
1009 };
1010 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1011 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1012 .mr = 9,
1013 .qr = 8,
1014 };
Marat Dukhan69722492019-11-11 19:55:50 -08001015 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1016 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1017 .pixel_tile = 1,
1018 .channel_tile = 2,
1019 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001020 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1021 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1022 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001023 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1024 .row_tile = 4,
1025 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001026 };
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001027 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001028 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001029 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1030 .channel_tile = 1,
1031 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001032 };
1033 #ifndef XNN_NO_SPNCHW_OPERATORS
1034 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001035 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1036 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001037 .nr = 1,
1038 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001039 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1040 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1041 .mr = 8,
1042 .nr = 2,
1043 };
1044 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1045 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1046 .mr = 8,
1047 .nr = 4,
1048 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001049 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1050 .ukernel_with_symm_padding =
1051 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1052 .output_channel_tile = 4,
1053 .output_height_tile = 1,
1054 .output_width_tile = 1,
1055 };
1056 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1057 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1058 .input_width_tile = 1,
1059 .output_width_tile = 1,
1060 .output_height_tile = 1,
1061 };
1062 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1063 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1064 .input_width_tile = 1,
1065 .output_width_tile = 1,
1066 .output_height_tile = 1,
1067 };
1068 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1069 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1070 .channel_tile = 1,
1071 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001072 #endif // XNN_NO_SPNCHW_OPERATORS
1073 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001074
1075 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001076 #ifndef XNN_NO_X32_OPERATORS
1077 xnn_params.x32.pad = (struct pad_parameters) {
1078 .ukernel = xnn_x32_pad_x2__scalar,
1079 .mr = 2,
1080 };
1081 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1082 xnn_params.x32.zip = (struct zip_parameters) {
1083 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1084 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1085 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1086 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1087 };
1088 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001089
1090#else
1091 #error "Unsupported architecture"
1092#endif
1093 xnn_params.initialized = true;
1094}
1095
1096enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001097 #ifndef __EMSCRIPTEN__
1098 if (!cpuinfo_initialize()) {
1099 return xnn_status_out_of_memory;
1100 }
1101 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001102 pthread_once(&init_guard, &init);
1103 if (xnn_params.initialized) {
1104 return xnn_status_success;
1105 } else {
1106 return xnn_status_unsupported_hardware;
1107 }
1108}
1109
1110enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001111 #ifndef __EMSCRIPTEN__
1112 cpuinfo_deinitialize();
1113 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001114 return xnn_status_success;
1115}