blob: 43814cc44e4209622df3ed684b41c294fedaa9e5 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
22#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/conv.h>
25#include <xnnpack/dwconv.h>
26#include <xnnpack/gavgpool.h>
27#include <xnnpack/gemm.h>
28#include <xnnpack/hswish.h>
29#include <xnnpack/igemm.h>
30#include <xnnpack/log.h>
31#include <xnnpack/lut.h>
32#include <xnnpack/maxpool.h>
33#include <xnnpack/pad.h>
34#include <xnnpack/params.h>
35#include <xnnpack/pavgpool.h>
36#include <xnnpack/prelu.h>
37#include <xnnpack/rmax.h>
38#include <xnnpack/spmm.h>
39#include <xnnpack/unpool.h>
40#include <xnnpack/vadd.h>
41#include <xnnpack/vmulcaddc.h>
42#include <xnnpack/zip.h>
43
44#ifndef XNN_ENABLE_ASSEMBLY
45 #define XNN_ENABLE_ASSEMBLY 1
46#endif
47
48static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
49
50struct xnn_parameters xnn_params = {
51 .initialized = false
52};
53
Marat Dukhan1dadbf72019-10-01 10:46:20 -070054#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
56#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
59#endif
60
61static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 if (!cpuinfo_has_arm_neon()) {
64 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
65 return;
66 }
67
68 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070069 #ifndef XNN_NO_Q8_OPERATORS
70 xnn_params.q8.gemm = (struct gemm_parameters) {
71 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
72 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
73 .mr = 4,
74 .nr = 8,
75 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070076
Marat Dukhan8fe54e42019-10-10 14:12:59 -070077 #if XNN_ENABLE_ASSEMBLY
78 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
79 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
80 .cr = 8,
81 .mr = 9,
82 };
83 #else
84 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
85 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
86 .cr = 8,
87 .mr = 9,
88 };
89 #endif
90 xnn_params.q8.avgpool = (struct avgpool_parameters) {
91 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
92 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
93 .mr = 9,
94 .qr = 8,
95 };
96 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
97 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
98 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
99 .mr = 7,
100 };
101 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
102 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700103
104 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700105 #ifndef XNN_NO_U8_OPERATORS
106 xnn_params.u8.maxpool = (struct maxpool_parameters) {
107 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
108 .mr = 9,
109 .qr = 8,
110 };
111 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
112 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
113 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
114 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700115
116 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700117 #ifndef XNN_NO_X8_OPERATORS
118 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
119 xnn_params.x8.zip = (struct zip_parameters) {
120 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
121 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
122 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
123 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
124 };
125 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126
127 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700128 #ifndef XNN_NO_F32_OPERATORS
129 xnn_params.f32.gemm = (struct gemm_parameters) {
130 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
131 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
132 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
133 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
134 .mr = 4,
135 .nr = 8,
136 };
137 xnn_params.f32.gemm2 = (struct gemm_parameters) {
138 .gemm = NULL,
139 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
140 .mr = 4,
141 .nr = 2,
142 };
143 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
144 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
145 .cr = 4,
146 .mr = 4,
147 };
148 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
149 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
150 .cr = 4,
151 .mr = 9,
152 };
153 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
154 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
155 .cr = 4,
156 .mr = 25,
157 };
158 xnn_params.f32.avgpool = (struct avgpool_parameters) {
159 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
160 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
161 .mr = 9,
162 .qr = 8,
163 };
164 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
165 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
166 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
167 .mr = 9,
168 .qr = 8,
169 };
170 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
171 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
172 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
173 .mr = 7,
174 };
175 xnn_params.f32.maxpool = (struct maxpool_parameters) {
176 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
177 .mr = 9,
178 .qr = 8,
179 };
180 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
181 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
182 .mr = 4,
183 };
184 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
185 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
186 .mr = 9,
187 };
188 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
189 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
190 .mr = 9,
191 .qr = 8,
192 };
193 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
194 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
195 xnn_params.f32.prelu = (struct prelu_parameters) {
196 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
197 .mr = 4,
198 };
199 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
200 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
201 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
202 .cr = 4,
203 .mr = 2,
204 };
205 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700206
207 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700208 #ifndef XNN_NO_X32_OPERATORS
209 xnn_params.x32.pad = (struct pad_parameters) {
210 .ukernel = xnn_x32_pad_x2__neon,
211 .mr = 2,
212 };
213 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
214 xnn_params.x32.zip = (struct zip_parameters) {
215 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
216 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
217 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
218 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
219 };
220 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700221
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700222#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223
224 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700225 #ifndef XNN_NO_Q8_OPERATORS
226 xnn_params.q8.gemm = (struct gemm_parameters) {
227 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
228 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
229 .mr = 8,
230 .nr = 8,
231 };
232 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
233 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
234 .cr = 8,
235 .mr = 9,
236 };
237 xnn_params.q8.avgpool = (struct avgpool_parameters) {
238 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
239 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
240 .mr = 9,
241 .qr = 8,
242 };
243 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
244 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
245 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
246 .mr = 7,
247 };
248 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
249 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250
251 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700252 #ifndef XNN_NO_U8_OPERATORS
253 xnn_params.u8.maxpool = (struct maxpool_parameters) {
254 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
255 .mr = 9,
256 .qr = 8,
257 };
258 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
259 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
260 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
261 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700262
263 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700264 #ifndef XNN_NO_X8_OPERATORS
265 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
266 xnn_params.x8.zip = (struct zip_parameters) {
267 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
268 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
269 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
270 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
271 };
272 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700273
274 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700275 #ifndef XNN_NO_F32_OPERATORS
276 #if XNN_ENABLE_ASSEMBLY
277 switch (cpuinfo_get_core(0)->uarch) {
278 case cpuinfo_uarch_kryo:
279 xnn_params.f32.gemm = (struct gemm_parameters) {
280 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
281 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
282 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
283 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
284 .mr = 4,
285 .nr = 8,
286 };
287 break;
288 case cpuinfo_uarch_cortex_a57:
289 xnn_params.f32.gemm = (struct gemm_parameters) {
290 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
291 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
292 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
293 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
294 .mr = 6,
295 .nr = 8,
296 };
297 break;
298 case cpuinfo_uarch_cortex_a72:
299 case cpuinfo_uarch_cortex_a76:
300 xnn_params.f32.gemm = (struct gemm_parameters) {
301 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
302 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
303 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
304 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
305 .mr = 4,
306 .nr = 8,
307 };
308 break;
309 case cpuinfo_uarch_cortex_a75:
310 case cpuinfo_uarch_mongoose_m1:
311 case cpuinfo_uarch_mongoose_m2:
312 case cpuinfo_uarch_meerkat_m3:
313 case (cpuinfo_uarch_meerkat_m3 + 1):
314 xnn_params.f32.gemm = (struct gemm_parameters) {
315 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
316 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
317 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
318 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
319 .mr = 6,
320 .nr = 8,
321 };
322 break;
323 case cpuinfo_uarch_cortex_a53:
324 case cpuinfo_uarch_cortex_a55:
325 xnn_params.f32.gemm = (struct gemm_parameters) {
326 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
327 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
328 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
329 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
330 .mr = 4,
331 .nr = 12,
332 };
333 break;
334 case cpuinfo_uarch_cortex_a73:
335 xnn_params.f32.gemm = (struct gemm_parameters) {
336 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
337 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
338 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
339 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
340 .mr = 6,
341 .nr = 8,
342 };
343 break;
344 default:
345 xnn_params.f32.gemm = (struct gemm_parameters) {
346 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
347 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
348 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
349 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
350 .mr = 4,
351 .nr = 8,
352 };
353 break;
354 }
355 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700356 xnn_params.f32.gemm = (struct gemm_parameters) {
357 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
358 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700359 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
360 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700361 .mr = 4,
362 .nr = 8,
363 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700364 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700365
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700366 xnn_params.f32.gemm2 = (struct gemm_parameters) {
367 .gemm = NULL,
368 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
369 .mr = 4,
370 .nr = 2,
371 };
372 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
373 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
374 .cr = 4,
375 .mr = 4,
376 };
377 switch (cpuinfo_get_core(0)->uarch) {
378 case cpuinfo_uarch_kryo:
379 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
380 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
381 .cr = 4,
382 .mr = 9,
383 };
384 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700385#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700386 case cpuinfo_uarch_cortex_a53:
387 case cpuinfo_uarch_cortex_a55:
388 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
389 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
390 .cr = 4,
391 .mr = 9,
392 };
393 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700394#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700395 default:
396 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
397 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
398 .cr = 8,
399 .mr = 9,
400 };
401 break;
402 }
403 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
404 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
405 .cr = 4,
406 .mr = 25,
407 };
408 xnn_params.f32.avgpool = (struct avgpool_parameters) {
409 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
410 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
411 .mr = 9,
412 .qr = 8,
413 };
414 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
415 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
416 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
417 .mr = 9,
418 .qr = 8,
419 };
420 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
421 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
422 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
423 .mr = 7,
424 };
425 xnn_params.f32.maxpool = (struct maxpool_parameters) {
426 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
427 .mr = 9,
428 .qr = 8,
429 };
430 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
431 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
432 .mr = 4,
433 };
434 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
435 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
436 .mr = 9,
437 };
438 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
439 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
440 .mr = 9,
441 .qr = 8,
442 };
443 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
444 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
445 xnn_params.f32.prelu = (struct prelu_parameters) {
446 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
447 .mr = 4,
448 };
449 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
450 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
451 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
452 .cr = 4,
453 .mr = 2,
454 };
455 #ifndef XNN_NO_SPNCHW_OPERATORS
456 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700457 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700458 .mr = 16,
459 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700460 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700461 xnn_params.f32.spmm2 = (struct spmm_parameters) {
462 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
463 .mr = 16,
464 .nr = 2,
465 };
466 xnn_params.f32.spmm4 = (struct spmm_parameters) {
467 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
468 .mr = 16,
469 .nr = 4,
470 };
471 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
472 .ukernel_with_symm_padding =
473 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
474 .output_channel_tile = 4,
475 .output_height_tile = 2,
476 .output_width_tile = 2,
477 };
478 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
479 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
480 .input_width_tile = 4,
481 .output_width_tile = 4,
482 .output_height_tile = 3,
483 };
484 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
485 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
486 .input_width_tile = 4,
487 .output_width_tile = 4,
488 .output_height_tile = 1,
489 };
490 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
491 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
492 .channel_tile = 4,
493 };
494 #endif // XNN_NO_SPNCHW_OPERATORS
495 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700496
497 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700498 #ifndef XNN_NO_X32_OPERATORS
499 xnn_params.x32.pad = (struct pad_parameters) {
500 .ukernel = xnn_x32_pad_x2__neon,
501 .mr = 2,
502 };
503 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
504 xnn_params.x32.zip = (struct zip_parameters) {
505 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
506 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
507 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
508 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
509 };
510 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700511
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700512#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700513 if (!cpuinfo_has_x86_sse2()) {
514 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
515 return;
516 }
517
518 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700519 #ifndef XNN_NO_Q8_OPERATORS
520 xnn_params.q8.gemm = (struct gemm_parameters) {
521 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
522 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
523 .mr = 4,
524 .nr = 4,
525 .log2_kr = 1,
526 };
527 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
528 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
529 .cr = 8,
530 .mr = 9,
531 };
532 xnn_params.q8.avgpool = (struct avgpool_parameters) {
533 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
534 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
535 .mr = 9,
536 .qr = 8,
537 };
538 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
539 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
540 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
541 .mr = 7,
542 };
543 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
544 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700545
546 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700547 #ifndef XNN_NO_U8_OPERATORS
548 xnn_params.u8.maxpool = (struct maxpool_parameters) {
549 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
550 .mr = 9,
551 .qr = 8,
552 };
553 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
554 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
555 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
556 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700557
558 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700559 #ifndef XNN_NO_X8_OPERATORS
560 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
561 xnn_params.x8.zip = (struct zip_parameters) {
562 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
563 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
564 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
565 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
566 };
567 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700568
569 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700570 #ifndef XNN_NO_F32_OPERATORS
571 xnn_params.f32.gemm = (struct gemm_parameters) {
572 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
573 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
574 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
575 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
576 .mr = 4,
577 .nr = 8,
578 };
579 xnn_params.f32.gemm2 = (struct gemm_parameters) {
580 .gemm = NULL,
581 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
582 .mr = 4,
583 .nr = 2,
584 .log2_kr = 2,
585 };
586 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
587 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
588 .cr = 4,
589 .mr = 4,
590 };
591 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
592 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
593 .cr = 4,
594 .mr = 9,
595 };
596 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
597 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
598 .cr = 4,
599 .mr = 25,
600 };
601 xnn_params.f32.avgpool = (struct avgpool_parameters) {
602 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
603 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
604 .mr = 9,
605 .qr = 8,
606 };
607 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
608 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
609 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
610 .mr = 9,
611 .qr = 8,
612 };
613 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
614 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
615 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
616 .mr = 7,
617 };
618 xnn_params.f32.maxpool = (struct maxpool_parameters) {
619 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
620 .mr = 9,
621 .qr = 8,
622 };
623 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
624 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
625 .mr = 4,
626 };
627 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
628 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
629 .mr = 9,
630 };
631 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
632 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
633 .mr = 9,
634 .qr = 8,
635 };
636 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
637 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
638 xnn_params.f32.prelu = (struct prelu_parameters) {
639 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
640 .mr = 4,
641 };
642 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
643 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
644 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
645 .cr = 4,
646 .mr = 2,
647 };
648 #ifndef XNN_NO_SPNCHW_OPERATORS
649 xnn_params.f32.spmm = (struct spmm_parameters) {
650 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
651 .mr = 4,
652 .nr = 1,
653 };
654 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
655 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
656 .input_width_tile = 4,
657 .output_width_tile = 4,
658 .output_height_tile = 1,
659 };
660 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
661 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
662 .input_width_tile = 4,
663 .output_width_tile = 4,
664 .output_height_tile = 1,
665 };
666 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
667 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
668 .channel_tile = 4,
669 };
670 #endif // XNN_NO_SPNCHW_OPERATORS
671 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700672
673 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700674 #ifndef XNN_NO_X32_OPERATORS
675 xnn_params.x32.pad = (struct pad_parameters) {
676 .ukernel = xnn_x32_pad_x2__sse2,
677 .mr = 2,
678 };
679 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
680 xnn_params.x32.zip = (struct zip_parameters) {
681 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
682 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
683 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
684 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
685 };
686 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700687
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700688#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700689 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
690 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
691 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
692 // of two infinities (must produce NaN per IEEE 754 standard).
693 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
694 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
695
XNNPACK Teamb455b122019-09-27 18:10:33 -0700696 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700697 #ifndef XNN_NO_Q8_OPERATORS
698 xnn_params.q8.gemm = (struct gemm_parameters) {
699 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
700 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
701 .mr = 2,
702 .nr = 2,
703 };
704 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
705 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
706 .cr = 1,
707 .mr = 9,
708 };
709 xnn_params.q8.avgpool = (struct avgpool_parameters) {
710 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
711 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
712 .mr = 9,
713 .qr = 8,
714 };
715 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
716 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
717 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
718 .mr = 7,
719 };
720 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
721 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700722
723 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700724 #ifndef XNN_NO_U8_OPERATORS
725 xnn_params.u8.maxpool = (struct maxpool_parameters) {
726 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
727 .mr = 9,
728 .qr = 8,
729 };
730 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
731 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
732 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
733 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700734
735 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700736 #ifndef XNN_NO_X8_OPERATORS
737 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
738 xnn_params.x8.zip = (struct zip_parameters) {
739 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
740 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
741 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
742 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
743 };
744 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700745
746 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700747 #ifndef XNN_NO_F32_OPERATORS
748 if (is_wasm_x86) {
749 xnn_params.f32.gemm = (struct gemm_parameters) {
750 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8s4__psimd,
751 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8s4__psimd,
752 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__psimd,
753 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
754 .mr = 4,
755 .nr = 8,
756 .log2_sr = 2,
757 };
758 } else {
759 xnn_params.f32.gemm = (struct gemm_parameters) {
760 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
761 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
762 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
763 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
764 .mr = 6,
765 .nr = 8,
766 .log2_sr = 2,
767 };
768 }
769 xnn_params.f32.gemm2 = (struct gemm_parameters) {
770 .gemm = NULL,
771 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700772 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700773 .nr = 2,
774 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700775 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700776 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
777 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
778 .cr = 4,
779 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700780 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700781 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
782 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
783 .cr = 4,
784 .mr = 9,
785 };
786 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
787 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
788 .cr = 4,
789 .mr = 25,
790 };
791 xnn_params.f32.avgpool = (struct avgpool_parameters) {
792 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
793 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
794 .mr = 9,
795 .qr = 8,
796 };
797 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
798 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
799 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
800 .mr = 9,
801 .qr = 8,
802 };
803 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
804 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
805 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
806 .mr = 7,
807 };
808 xnn_params.f32.maxpool = (struct maxpool_parameters) {
809 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
810 .mr = 9,
811 .qr = 8,
812 };
813 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
814 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
815 .mr = 4,
816 };
817 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
818 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
819 .mr = 9,
820 };
821 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
822 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
823 .mr = 9,
824 .qr = 8,
825 };
826 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
827 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
828 xnn_params.f32.prelu = (struct prelu_parameters) {
829 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
830 .mr = 4,
831 };
832 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
833 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
834 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
835 .cr = 4,
836 .mr = 2,
837 };
838 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700839
840 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700841 #ifndef XNN_NO_X32_OPERATORS
842 xnn_params.x32.pad = (struct pad_parameters) {
843 .ukernel = xnn_x32_pad_x2__psimd,
844 .mr = 2,
845 };
846 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
847 xnn_params.x32.zip = (struct zip_parameters) {
848 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
849 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
850 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
851 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
852 };
853 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700854
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700855#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700856 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
857 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
858 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
859 // of two infinities (must produce NaN per IEEE 754 standard).
860 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
861 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
862
863 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700864 #ifndef XNN_NO_Q8_OPERATORS
865 xnn_params.q8.gemm = (struct gemm_parameters) {
866 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
867 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
868 .mr = 2,
869 .nr = 2,
870 };
871 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
872 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
873 .cr = 1,
874 .mr = 9,
875 };
876 xnn_params.q8.avgpool = (struct avgpool_parameters) {
877 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
878 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
879 .mr = 9,
880 .qr = 8,
881 };
882 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
883 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
884 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
885 .mr = 7,
886 };
887 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
888 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700889
890 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700891 #ifndef XNN_NO_U8_OPERATORS
892 xnn_params.u8.maxpool = (struct maxpool_parameters) {
893 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
894 .mr = 9,
895 .qr = 8,
896 };
897 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
898 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
899 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
900 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700901
902 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700903 #ifndef XNN_NO_X8_OPERATORS
904 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
905 xnn_params.x8.zip = (struct zip_parameters) {
906 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
907 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
908 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
909 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
910 };
911 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700912
913 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700914 #ifndef XNN_NO_F32_OPERATORS
915 if (is_wasm_x86) {
916 xnn_params.f32.gemm = (struct gemm_parameters) {
917 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
918 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
919 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
920 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
921 .mr = 2,
922 .nr = 4,
923 };
924 } else {
925 xnn_params.f32.gemm = (struct gemm_parameters) {
926 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
927 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
928 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
929 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
930 .mr = 4,
931 .nr = 4,
932 };
933 }
934 xnn_params.f32.gemm2 = (struct gemm_parameters) {
935 .gemm = NULL,
936 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700937 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700938 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700939 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700940 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
941 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
942 .cr = 1,
943 .mr = 4,
944 };
945 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
946 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
947 .cr = 1,
948 .mr = 9,
949 };
950 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
951 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
952 .cr = 1,
953 .mr = 25,
954 };
955 xnn_params.f32.avgpool = (struct avgpool_parameters) {
956 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
957 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
958 .mr = 9,
959 .qr = 8,
960 };
961 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
962 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
963 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
964 .mr = 9,
965 .qr = 8,
966 };
967 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
968 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
969 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
970 .mr = 7,
971 };
972 xnn_params.f32.maxpool = (struct maxpool_parameters) {
973 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
974 .mr = 9,
975 .qr = 8,
976 };
977 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
978 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
979 .mr = 4,
980 };
981 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
982 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
983 .mr = 9,
984 };
985 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
986 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
987 .mr = 9,
988 .qr = 8,
989 };
990 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
991 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
992 xnn_params.f32.prelu = (struct prelu_parameters) {
993 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
994 .mr = 4,
995 };
996 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
997 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
998 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
999 .cr = 1,
1000 .mr = 2,
1001 };
1002 #ifndef XNN_NO_SPNCHW_OPERATORS
1003 xnn_params.f32.spmm = (struct spmm_parameters) {
1004 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
1005 .mr = 4,
1006 .nr = 1,
1007 };
1008 #endif // XNN_NO_SPNCHW_OPERATORS
1009 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001010
1011 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001012 #ifndef XNN_NO_X32_OPERATORS
1013 xnn_params.x32.pad = (struct pad_parameters) {
1014 .ukernel = xnn_x32_pad_x2__scalar,
1015 .mr = 2,
1016 };
1017 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1018 xnn_params.x32.zip = (struct zip_parameters) {
1019 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1020 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1021 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1022 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1023 };
1024 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001025
1026#else
1027 #error "Unsupported architecture"
1028#endif
1029 xnn_params.initialized = true;
1030}
1031
1032enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001033 #ifndef __EMSCRIPTEN__
1034 if (!cpuinfo_initialize()) {
1035 return xnn_status_out_of_memory;
1036 }
1037 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001038 pthread_once(&init_guard, &init);
1039 if (xnn_params.initialized) {
1040 return xnn_status_success;
1041 } else {
1042 return xnn_status_unsupported_hardware;
1043 }
1044}
1045
1046enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001047 #ifndef __EMSCRIPTEN__
1048 cpuinfo_deinitialize();
1049 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001050 return xnn_status_success;
1051}