blob: edd57dd845ff71962ba4907b919ca2c80c20d351 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
22#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/conv.h>
25#include <xnnpack/dwconv.h>
26#include <xnnpack/gavgpool.h>
27#include <xnnpack/gemm.h>
28#include <xnnpack/hswish.h>
29#include <xnnpack/igemm.h>
30#include <xnnpack/log.h>
31#include <xnnpack/lut.h>
32#include <xnnpack/maxpool.h>
33#include <xnnpack/pad.h>
34#include <xnnpack/params.h>
35#include <xnnpack/pavgpool.h>
36#include <xnnpack/prelu.h>
37#include <xnnpack/rmax.h>
38#include <xnnpack/spmm.h>
39#include <xnnpack/unpool.h>
40#include <xnnpack/vadd.h>
41#include <xnnpack/vmulcaddc.h>
42#include <xnnpack/zip.h>
43
44#ifndef XNN_ENABLE_ASSEMBLY
45 #define XNN_ENABLE_ASSEMBLY 1
46#endif
47
48static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
49
50struct xnn_parameters xnn_params = {
51 .initialized = false
52};
53
Marat Dukhan1dadbf72019-10-01 10:46:20 -070054#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
56#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
59#endif
60
61static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 if (!cpuinfo_has_arm_neon()) {
64 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
65 return;
66 }
67
68 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070069 #ifndef XNN_NO_Q8_OPERATORS
70 xnn_params.q8.gemm = (struct gemm_parameters) {
71 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
72 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
73 .mr = 4,
74 .nr = 8,
75 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070076
Marat Dukhan8fe54e42019-10-10 14:12:59 -070077 #if XNN_ENABLE_ASSEMBLY
78 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
79 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
80 .cr = 8,
81 .mr = 9,
82 };
83 #else
84 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
85 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
86 .cr = 8,
87 .mr = 9,
88 };
89 #endif
90 xnn_params.q8.avgpool = (struct avgpool_parameters) {
91 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
92 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
93 .mr = 9,
94 .qr = 8,
95 };
96 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
97 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
98 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
99 .mr = 7,
100 };
101 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
102 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700103
104 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700105 #ifndef XNN_NO_U8_OPERATORS
106 xnn_params.u8.maxpool = (struct maxpool_parameters) {
107 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
108 .mr = 9,
109 .qr = 8,
110 };
111 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
112 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
113 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
114 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700115
116 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700117 #ifndef XNN_NO_X8_OPERATORS
118 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
119 xnn_params.x8.zip = (struct zip_parameters) {
120 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
121 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
122 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
123 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
124 };
125 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126
127 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700128 #ifndef XNN_NO_F32_OPERATORS
129 xnn_params.f32.gemm = (struct gemm_parameters) {
130 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
131 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
132 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
133 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
134 .mr = 4,
135 .nr = 8,
136 };
137 xnn_params.f32.gemm2 = (struct gemm_parameters) {
138 .gemm = NULL,
139 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
140 .mr = 4,
141 .nr = 2,
142 };
143 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
144 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
145 .cr = 4,
146 .mr = 4,
147 };
148 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
149 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
150 .cr = 4,
151 .mr = 9,
152 };
153 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
154 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
155 .cr = 4,
156 .mr = 25,
157 };
158 xnn_params.f32.avgpool = (struct avgpool_parameters) {
159 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
160 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
161 .mr = 9,
162 .qr = 8,
163 };
164 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
165 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
166 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
167 .mr = 9,
168 .qr = 8,
169 };
170 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
171 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
172 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
173 .mr = 7,
174 };
175 xnn_params.f32.maxpool = (struct maxpool_parameters) {
176 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
177 .mr = 9,
178 .qr = 8,
179 };
180 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
181 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
182 .mr = 4,
183 };
184 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
185 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
186 .mr = 9,
187 };
188 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
189 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
190 .mr = 9,
191 .qr = 8,
192 };
193 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
194 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
195 xnn_params.f32.prelu = (struct prelu_parameters) {
196 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
197 .mr = 4,
198 };
199 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
200 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
201 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
202 .cr = 4,
203 .mr = 2,
204 };
205 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700206
207 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700208 #ifndef XNN_NO_X32_OPERATORS
209 xnn_params.x32.pad = (struct pad_parameters) {
210 .ukernel = xnn_x32_pad_x2__neon,
211 .mr = 2,
212 };
213 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
214 xnn_params.x32.zip = (struct zip_parameters) {
215 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
216 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
217 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
218 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
219 };
220 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700221
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700222#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223
224 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700225 #ifndef XNN_NO_Q8_OPERATORS
226 xnn_params.q8.gemm = (struct gemm_parameters) {
227 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
228 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
229 .mr = 8,
230 .nr = 8,
231 };
232 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
233 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
234 .cr = 8,
235 .mr = 9,
236 };
237 xnn_params.q8.avgpool = (struct avgpool_parameters) {
238 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
239 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
240 .mr = 9,
241 .qr = 8,
242 };
243 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
244 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
245 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
246 .mr = 7,
247 };
248 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
249 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250
251 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700252 #ifndef XNN_NO_U8_OPERATORS
253 xnn_params.u8.maxpool = (struct maxpool_parameters) {
254 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
255 .mr = 9,
256 .qr = 8,
257 };
258 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
259 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
260 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
261 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700262
263 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700264 #ifndef XNN_NO_X8_OPERATORS
265 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
266 xnn_params.x8.zip = (struct zip_parameters) {
267 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
268 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
269 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
270 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
271 };
272 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700273
274 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700275 #ifndef XNN_NO_F32_OPERATORS
276 #if XNN_ENABLE_ASSEMBLY
277 switch (cpuinfo_get_core(0)->uarch) {
278 case cpuinfo_uarch_kryo:
279 xnn_params.f32.gemm = (struct gemm_parameters) {
280 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
281 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
282 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
283 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
284 .mr = 4,
285 .nr = 8,
286 };
287 break;
288 case cpuinfo_uarch_cortex_a57:
289 xnn_params.f32.gemm = (struct gemm_parameters) {
290 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
291 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
292 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
293 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
294 .mr = 6,
295 .nr = 8,
296 };
297 break;
298 case cpuinfo_uarch_cortex_a72:
299 case cpuinfo_uarch_cortex_a76:
300 xnn_params.f32.gemm = (struct gemm_parameters) {
301 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
302 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
303 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
304 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
305 .mr = 4,
306 .nr = 8,
307 };
308 break;
309 case cpuinfo_uarch_cortex_a75:
310 case cpuinfo_uarch_mongoose_m1:
311 case cpuinfo_uarch_mongoose_m2:
312 case cpuinfo_uarch_meerkat_m3:
313 case (cpuinfo_uarch_meerkat_m3 + 1):
314 xnn_params.f32.gemm = (struct gemm_parameters) {
315 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
316 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
317 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
318 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
319 .mr = 6,
320 .nr = 8,
321 };
322 break;
323 case cpuinfo_uarch_cortex_a53:
324 case cpuinfo_uarch_cortex_a55:
325 xnn_params.f32.gemm = (struct gemm_parameters) {
326 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
327 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
328 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
329 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
330 .mr = 4,
331 .nr = 12,
332 };
333 break;
334 case cpuinfo_uarch_cortex_a73:
335 xnn_params.f32.gemm = (struct gemm_parameters) {
336 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
337 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
338 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
339 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
340 .mr = 6,
341 .nr = 8,
342 };
343 break;
344 default:
345 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700346 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
347 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700348 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
349 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700350 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700351 .nr = 8,
352 };
353 break;
354 }
355 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700356 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700357 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
358 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700359 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
360 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700361 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700362 .nr = 8,
363 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700364 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700365
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700366 xnn_params.f32.gemm2 = (struct gemm_parameters) {
367 .gemm = NULL,
368 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
369 .mr = 4,
370 .nr = 2,
371 };
372 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
373 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
374 .cr = 4,
375 .mr = 4,
376 };
377 switch (cpuinfo_get_core(0)->uarch) {
378 case cpuinfo_uarch_kryo:
379 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
380 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
381 .cr = 4,
382 .mr = 9,
383 };
384 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700385#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700386 case cpuinfo_uarch_cortex_a53:
387 case cpuinfo_uarch_cortex_a55:
388 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
389 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
390 .cr = 4,
391 .mr = 9,
392 };
393 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700394#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700395 default:
396 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
397 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
398 .cr = 8,
399 .mr = 9,
400 };
401 break;
402 }
403 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
404 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
405 .cr = 4,
406 .mr = 25,
407 };
408 xnn_params.f32.avgpool = (struct avgpool_parameters) {
409 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
410 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
411 .mr = 9,
412 .qr = 8,
413 };
414 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
415 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
416 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
417 .mr = 9,
418 .qr = 8,
419 };
420 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
421 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
422 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
423 .mr = 7,
424 };
425 xnn_params.f32.maxpool = (struct maxpool_parameters) {
426 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
427 .mr = 9,
428 .qr = 8,
429 };
430 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
431 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
432 .mr = 4,
433 };
434 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
435 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
436 .mr = 9,
437 };
438 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
439 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
440 .mr = 9,
441 .qr = 8,
442 };
443 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
444 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
445 xnn_params.f32.prelu = (struct prelu_parameters) {
446 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
447 .mr = 4,
448 };
449 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
450 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
451 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
452 .cr = 4,
453 .mr = 2,
454 };
455 #ifndef XNN_NO_SPNCHW_OPERATORS
456 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700457 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700458 .mr = 16,
459 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700460 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700461 xnn_params.f32.spmm2 = (struct spmm_parameters) {
462 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
463 .mr = 16,
464 .nr = 2,
465 };
466 xnn_params.f32.spmm4 = (struct spmm_parameters) {
467 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
468 .mr = 16,
469 .nr = 4,
470 };
471 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
472 .ukernel_with_symm_padding =
473 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
474 .output_channel_tile = 4,
475 .output_height_tile = 2,
476 .output_width_tile = 2,
477 };
478 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
479 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
480 .input_width_tile = 4,
481 .output_width_tile = 4,
482 .output_height_tile = 3,
483 };
484 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
485 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
486 .input_width_tile = 4,
487 .output_width_tile = 4,
488 .output_height_tile = 1,
489 };
490 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
491 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
492 .channel_tile = 4,
493 };
494 #endif // XNN_NO_SPNCHW_OPERATORS
495 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700496
497 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700498 #ifndef XNN_NO_X32_OPERATORS
499 xnn_params.x32.pad = (struct pad_parameters) {
500 .ukernel = xnn_x32_pad_x2__neon,
501 .mr = 2,
502 };
503 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
504 xnn_params.x32.zip = (struct zip_parameters) {
505 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
506 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
507 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
508 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
509 };
510 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700511
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700512#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700513 if (!cpuinfo_has_x86_sse2()) {
514 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
515 return;
516 }
517
518 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700519 #ifndef XNN_NO_Q8_OPERATORS
520 xnn_params.q8.gemm = (struct gemm_parameters) {
521 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
522 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
523 .mr = 4,
524 .nr = 4,
525 .log2_kr = 1,
526 };
527 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
528 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
529 .cr = 8,
530 .mr = 9,
531 };
532 xnn_params.q8.avgpool = (struct avgpool_parameters) {
533 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
534 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
535 .mr = 9,
536 .qr = 8,
537 };
538 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
539 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
540 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
541 .mr = 7,
542 };
543 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
544 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700545
546 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700547 #ifndef XNN_NO_U8_OPERATORS
548 xnn_params.u8.maxpool = (struct maxpool_parameters) {
549 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
550 .mr = 9,
551 .qr = 8,
552 };
553 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
554 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
555 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
556 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700557
558 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700559 #ifndef XNN_NO_X8_OPERATORS
560 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
561 xnn_params.x8.zip = (struct zip_parameters) {
562 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
563 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
564 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
565 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
566 };
567 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700568
569 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700570 #ifndef XNN_NO_F32_OPERATORS
571 xnn_params.f32.gemm = (struct gemm_parameters) {
572 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
573 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
574 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
575 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
576 .mr = 4,
577 .nr = 8,
578 };
579 xnn_params.f32.gemm2 = (struct gemm_parameters) {
580 .gemm = NULL,
581 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
582 .mr = 4,
583 .nr = 2,
584 .log2_kr = 2,
585 };
586 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
587 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
588 .cr = 4,
589 .mr = 4,
590 };
591 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
592 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
593 .cr = 4,
594 .mr = 9,
595 };
596 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
597 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
598 .cr = 4,
599 .mr = 25,
600 };
601 xnn_params.f32.avgpool = (struct avgpool_parameters) {
602 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
603 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
604 .mr = 9,
605 .qr = 8,
606 };
607 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
608 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
609 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
610 .mr = 9,
611 .qr = 8,
612 };
613 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
614 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
615 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
616 .mr = 7,
617 };
618 xnn_params.f32.maxpool = (struct maxpool_parameters) {
619 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
620 .mr = 9,
621 .qr = 8,
622 };
623 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
624 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
625 .mr = 4,
626 };
627 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
628 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
629 .mr = 9,
630 };
631 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
632 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
633 .mr = 9,
634 .qr = 8,
635 };
636 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
637 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
638 xnn_params.f32.prelu = (struct prelu_parameters) {
639 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
640 .mr = 4,
641 };
642 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
643 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
644 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
645 .cr = 4,
646 .mr = 2,
647 };
648 #ifndef XNN_NO_SPNCHW_OPERATORS
649 xnn_params.f32.spmm = (struct spmm_parameters) {
650 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
651 .mr = 4,
652 .nr = 1,
653 };
654 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
655 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
656 .input_width_tile = 4,
657 .output_width_tile = 4,
658 .output_height_tile = 1,
659 };
660 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
661 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
662 .input_width_tile = 4,
663 .output_width_tile = 4,
664 .output_height_tile = 1,
665 };
666 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
667 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
668 .channel_tile = 4,
669 };
670 #endif // XNN_NO_SPNCHW_OPERATORS
671 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700672
673 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700674 #ifndef XNN_NO_X32_OPERATORS
675 xnn_params.x32.pad = (struct pad_parameters) {
676 .ukernel = xnn_x32_pad_x2__sse2,
677 .mr = 2,
678 };
679 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
680 xnn_params.x32.zip = (struct zip_parameters) {
681 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
682 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
683 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
684 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
685 };
686 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700687
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700688#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700689 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
690 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
691 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
692 // of two infinities (must produce NaN per IEEE 754 standard).
693 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
694 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
695
XNNPACK Teamb455b122019-09-27 18:10:33 -0700696 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700697 #ifndef XNN_NO_Q8_OPERATORS
698 xnn_params.q8.gemm = (struct gemm_parameters) {
699 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
700 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
701 .mr = 2,
702 .nr = 2,
703 };
704 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
705 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
706 .cr = 1,
707 .mr = 9,
708 };
709 xnn_params.q8.avgpool = (struct avgpool_parameters) {
710 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
711 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
712 .mr = 9,
713 .qr = 8,
714 };
715 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
716 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
717 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
718 .mr = 7,
719 };
720 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
721 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700722
723 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700724 #ifndef XNN_NO_U8_OPERATORS
725 xnn_params.u8.maxpool = (struct maxpool_parameters) {
726 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
727 .mr = 9,
728 .qr = 8,
729 };
730 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
731 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
732 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
733 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700734
735 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700736 #ifndef XNN_NO_X8_OPERATORS
737 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
738 xnn_params.x8.zip = (struct zip_parameters) {
739 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
740 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
741 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
742 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
743 };
744 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700745
746 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700747 #ifndef XNN_NO_F32_OPERATORS
748 if (is_wasm_x86) {
749 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700750 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
751 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
752 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
753 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700754 .mr = 4,
755 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700756 };
757 } else {
758 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700759 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__psimd_splat,
760 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__psimd_splat,
761 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
762 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700763 .mr = 6,
764 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700765 };
766 }
767 xnn_params.f32.gemm2 = (struct gemm_parameters) {
768 .gemm = NULL,
769 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700770 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700771 .nr = 2,
772 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700773 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700774 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
775 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
776 .cr = 4,
777 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700778 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700779 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
780 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
781 .cr = 4,
782 .mr = 9,
783 };
784 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
785 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
786 .cr = 4,
787 .mr = 25,
788 };
789 xnn_params.f32.avgpool = (struct avgpool_parameters) {
790 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
791 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
792 .mr = 9,
793 .qr = 8,
794 };
795 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
796 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
797 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
798 .mr = 9,
799 .qr = 8,
800 };
801 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
802 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
803 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
804 .mr = 7,
805 };
806 xnn_params.f32.maxpool = (struct maxpool_parameters) {
807 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
808 .mr = 9,
809 .qr = 8,
810 };
811 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
812 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
813 .mr = 4,
814 };
815 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
816 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
817 .mr = 9,
818 };
819 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
820 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
821 .mr = 9,
822 .qr = 8,
823 };
824 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
825 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
826 xnn_params.f32.prelu = (struct prelu_parameters) {
827 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
828 .mr = 4,
829 };
830 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
831 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
832 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
833 .cr = 4,
834 .mr = 2,
835 };
836 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700837
838 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700839 #ifndef XNN_NO_X32_OPERATORS
840 xnn_params.x32.pad = (struct pad_parameters) {
841 .ukernel = xnn_x32_pad_x2__psimd,
842 .mr = 2,
843 };
844 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
845 xnn_params.x32.zip = (struct zip_parameters) {
846 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
847 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
848 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
849 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
850 };
851 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700852
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700853#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700854 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
855 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
856 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
857 // of two infinities (must produce NaN per IEEE 754 standard).
858 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
859 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
860
861 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700862 #ifndef XNN_NO_Q8_OPERATORS
863 xnn_params.q8.gemm = (struct gemm_parameters) {
864 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
865 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
866 .mr = 2,
867 .nr = 2,
868 };
869 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
870 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
871 .cr = 1,
872 .mr = 9,
873 };
874 xnn_params.q8.avgpool = (struct avgpool_parameters) {
875 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
876 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
877 .mr = 9,
878 .qr = 8,
879 };
880 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
881 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
882 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
883 .mr = 7,
884 };
885 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
886 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700887
888 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700889 #ifndef XNN_NO_U8_OPERATORS
890 xnn_params.u8.maxpool = (struct maxpool_parameters) {
891 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
892 .mr = 9,
893 .qr = 8,
894 };
895 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
896 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
897 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
898 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700899
900 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700901 #ifndef XNN_NO_X8_OPERATORS
902 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
903 xnn_params.x8.zip = (struct zip_parameters) {
904 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
905 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
906 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
907 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
908 };
909 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700910
911 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700912 #ifndef XNN_NO_F32_OPERATORS
913 if (is_wasm_x86) {
914 xnn_params.f32.gemm = (struct gemm_parameters) {
915 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
916 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
917 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
918 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
919 .mr = 2,
920 .nr = 4,
921 };
922 } else {
923 xnn_params.f32.gemm = (struct gemm_parameters) {
924 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
925 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
926 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
927 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
928 .mr = 4,
929 .nr = 4,
930 };
931 }
932 xnn_params.f32.gemm2 = (struct gemm_parameters) {
933 .gemm = NULL,
934 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700935 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700936 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700937 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700938 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
939 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
940 .cr = 1,
941 .mr = 4,
942 };
943 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
944 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
945 .cr = 1,
946 .mr = 9,
947 };
948 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
949 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
950 .cr = 1,
951 .mr = 25,
952 };
953 xnn_params.f32.avgpool = (struct avgpool_parameters) {
954 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
955 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
956 .mr = 9,
957 .qr = 8,
958 };
959 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
960 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
961 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
962 .mr = 9,
963 .qr = 8,
964 };
965 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
966 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
967 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
968 .mr = 7,
969 };
970 xnn_params.f32.maxpool = (struct maxpool_parameters) {
971 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
972 .mr = 9,
973 .qr = 8,
974 };
975 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
976 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
977 .mr = 4,
978 };
979 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
980 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
981 .mr = 9,
982 };
983 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
984 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
985 .mr = 9,
986 .qr = 8,
987 };
988 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
989 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
990 xnn_params.f32.prelu = (struct prelu_parameters) {
991 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
992 .mr = 4,
993 };
994 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
995 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
996 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
997 .cr = 1,
998 .mr = 2,
999 };
1000 #ifndef XNN_NO_SPNCHW_OPERATORS
1001 xnn_params.f32.spmm = (struct spmm_parameters) {
1002 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
1003 .mr = 4,
1004 .nr = 1,
1005 };
1006 #endif // XNN_NO_SPNCHW_OPERATORS
1007 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001008
1009 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001010 #ifndef XNN_NO_X32_OPERATORS
1011 xnn_params.x32.pad = (struct pad_parameters) {
1012 .ukernel = xnn_x32_pad_x2__scalar,
1013 .mr = 2,
1014 };
1015 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1016 xnn_params.x32.zip = (struct zip_parameters) {
1017 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1018 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1019 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1020 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1021 };
1022 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001023
1024#else
1025 #error "Unsupported architecture"
1026#endif
1027 xnn_params.initialized = true;
1028}
1029
1030enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001031 #ifndef __EMSCRIPTEN__
1032 if (!cpuinfo_initialize()) {
1033 return xnn_status_out_of_memory;
1034 }
1035 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001036 pthread_once(&init_guard, &init);
1037 if (xnn_params.initialized) {
1038 return xnn_status_success;
1039 } else {
1040 return xnn_status_unsupported_hardware;
1041 }
1042}
1043
1044enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001045 #ifndef __EMSCRIPTEN__
1046 cpuinfo_deinitialize();
1047 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001048 return xnn_status_success;
1049}