blob: a82716a08a9a87c78a3285e41a60cc51781263fa [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080012#include <string.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
14#include <pthread.h>
15
Marat Dukhand343c222019-10-07 09:22:14 -070016#ifndef __EMSCRIPTEN__
17 #include <cpuinfo.h>
18#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
20#include <xnnpack.h>
21#include <xnnpack/argmaxpool.h>
22#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080023#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070026#include <xnnpack/conv.h>
27#include <xnnpack/dwconv.h>
28#include <xnnpack/gavgpool.h>
29#include <xnnpack/gemm.h>
30#include <xnnpack/hswish.h>
31#include <xnnpack/igemm.h>
32#include <xnnpack/log.h>
33#include <xnnpack/lut.h>
34#include <xnnpack/maxpool.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080035#include <xnnpack/memory.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070036#include <xnnpack/pad.h>
37#include <xnnpack/params.h>
38#include <xnnpack/pavgpool.h>
39#include <xnnpack/prelu.h>
40#include <xnnpack/rmax.h>
41#include <xnnpack/spmm.h>
42#include <xnnpack/unpool.h>
43#include <xnnpack/vadd.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080044#include <xnnpack/vbinary.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/vmulcaddc.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080046#include <xnnpack/vunary.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070047#include <xnnpack/zip.h>
48
49#ifndef XNN_ENABLE_ASSEMBLY
50 #define XNN_ENABLE_ASSEMBLY 1
51#endif
52
53static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
54
55struct xnn_parameters xnn_params = {
56 .initialized = false
57};
58
Marat Dukhan1dadbf72019-10-01 10:46:20 -070059#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
61#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
64#endif
65
66static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070067#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 if (!cpuinfo_has_arm_neon()) {
69 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
70 return;
71 }
72
73 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070074 #ifndef XNN_NO_Q8_OPERATORS
75 xnn_params.q8.gemm = (struct gemm_parameters) {
76 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
77 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
78 .mr = 4,
79 .nr = 8,
80 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070081
Marat Dukhan8fe54e42019-10-10 14:12:59 -070082 #if XNN_ENABLE_ASSEMBLY
83 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
84 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
85 .cr = 8,
86 .mr = 9,
87 };
88 #else
89 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
90 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
91 .cr = 8,
92 .mr = 9,
93 };
94 #endif
95 xnn_params.q8.avgpool = (struct avgpool_parameters) {
96 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
97 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
98 .mr = 9,
99 .qr = 8,
100 };
101 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
102 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
103 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
104 .mr = 7,
105 };
106 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
107 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700108
109 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700110 #ifndef XNN_NO_U8_OPERATORS
111 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800112 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__neon_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700113 .mr = 9,
114 .qr = 8,
115 };
116 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
117 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
118 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
119 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120
121 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700122 #ifndef XNN_NO_X8_OPERATORS
123 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
124 xnn_params.x8.zip = (struct zip_parameters) {
125 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
126 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
127 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
128 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
129 };
130 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700131
132 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700133 #ifndef XNN_NO_F32_OPERATORS
Frank Barchard32670922019-11-30 21:58:51 -0800134 #if XNN_ENABLE_ASSEMBLY
135 xnn_params.f32.gemm = (struct gemm_parameters) {
136 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_ld64,
137 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_lane_ld128,
138 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
139 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
140 .mr = 4,
141 .nr = 8,
142 };
143 #else // XNN_ENABLE_ASSEMBLY
144 xnn_params.f32.gemm = (struct gemm_parameters) {
145 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_lane_ld128,
146 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_lane_ld128,
147 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
148 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
149 .mr = 4,
150 .nr = 8,
151 };
152 #endif // XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700153 xnn_params.f32.gemm2 = (struct gemm_parameters) {
154 .gemm = NULL,
Frank Barchard91317c52019-11-22 10:54:35 -0800155 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700156 .mr = 4,
157 .nr = 2,
158 };
159 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
160 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
161 .cr = 4,
162 .mr = 4,
163 };
164 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
165 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
166 .cr = 4,
167 .mr = 9,
168 };
169 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
170 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
171 .cr = 4,
172 .mr = 25,
173 };
174 xnn_params.f32.avgpool = (struct avgpool_parameters) {
175 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
176 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
177 .mr = 9,
178 .qr = 8,
179 };
180 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
181 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
182 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
183 .mr = 9,
184 .qr = 8,
185 };
186 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
187 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
188 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
189 .mr = 7,
190 };
191 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800192 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700193 .mr = 9,
194 .qr = 8,
195 };
196 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800197 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700198 .mr = 4,
199 };
200 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800201 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700202 .mr = 9,
203 };
204 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800205 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700206 .mr = 9,
207 .qr = 8,
208 };
Marat Dukhan69722492019-11-11 19:55:50 -0800209 xnn_params.f32.bilinear = (struct bilinear_parameters) {
210 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
211 .pixel_tile = 1,
212 .channel_tile = 8,
213 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700214 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
215 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
216 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800217 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
218 .row_tile = 2,
219 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700220 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800221 xnn_params.f32.vadd = (struct vbinary_parameters) {
222 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__neon_x8,
223 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
224 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
225 .element_tile = 8,
226 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800227 xnn_params.f32.vmul = (struct vbinary_parameters) {
228 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
229 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
230 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800231 .element_tile = 8,
232 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700233 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800234 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
235 .channel_tile = 4,
236 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700237 };
238 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700239
240 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700241 #ifndef XNN_NO_X32_OPERATORS
242 xnn_params.x32.pad = (struct pad_parameters) {
243 .ukernel = xnn_x32_pad_x2__neon,
244 .mr = 2,
245 };
246 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
247 xnn_params.x32.zip = (struct zip_parameters) {
248 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
249 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
250 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
251 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
252 };
253 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700254
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700255#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700256
257 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700258 #ifndef XNN_NO_Q8_OPERATORS
259 xnn_params.q8.gemm = (struct gemm_parameters) {
260 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
261 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
262 .mr = 8,
263 .nr = 8,
264 };
265 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
266 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
267 .cr = 8,
268 .mr = 9,
269 };
270 xnn_params.q8.avgpool = (struct avgpool_parameters) {
271 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
272 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
273 .mr = 9,
274 .qr = 8,
275 };
276 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
277 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
278 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
279 .mr = 7,
280 };
281 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
282 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700283
284 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700285 #ifndef XNN_NO_U8_OPERATORS
286 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800287 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__neon_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700288 .mr = 9,
289 .qr = 8,
290 };
291 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
292 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
293 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
294 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700295
296 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700297 #ifndef XNN_NO_X8_OPERATORS
298 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
299 xnn_params.x8.zip = (struct zip_parameters) {
300 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
301 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
302 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
303 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
304 };
305 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700306
307 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700308 #ifndef XNN_NO_F32_OPERATORS
309 #if XNN_ENABLE_ASSEMBLY
310 switch (cpuinfo_get_core(0)->uarch) {
311 case cpuinfo_uarch_kryo:
312 xnn_params.f32.gemm = (struct gemm_parameters) {
313 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
314 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
315 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
316 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
317 .mr = 4,
318 .nr = 8,
319 };
320 break;
321 case cpuinfo_uarch_cortex_a57:
322 xnn_params.f32.gemm = (struct gemm_parameters) {
323 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
324 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
325 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
326 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
327 .mr = 6,
328 .nr = 8,
329 };
330 break;
331 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700332 xnn_params.f32.gemm = (struct gemm_parameters) {
333 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
334 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
335 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
336 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
337 .mr = 4,
338 .nr = 8,
339 };
340 break;
341 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700342 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700343 case cpuinfo_uarch_meerkat_m3:
344 case (cpuinfo_uarch_meerkat_m3 + 1):
345 xnn_params.f32.gemm = (struct gemm_parameters) {
346 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
347 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
348 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
349 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
350 .mr = 6,
351 .nr = 8,
352 };
353 break;
Frank Barcharddf06d802019-11-20 15:53:46 -0800354
355 case cpuinfo_uarch_mongoose_m1:
356 case cpuinfo_uarch_mongoose_m2:
357 xnn_params.f32.gemm = (struct gemm_parameters) {
358 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__neonfma,
359 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__neonfma,
360 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__neonfma,
361 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__neonfma,
362 .mr = 6,
363 .nr = 8,
364 .log2_sr = 2,
365 };
366 break;
367
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700368 case cpuinfo_uarch_cortex_a53:
369 case cpuinfo_uarch_cortex_a55:
370 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700371 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
372 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
373 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
374 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
375 .mr = 6,
376 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700377 };
378 break;
379 case cpuinfo_uarch_cortex_a73:
380 xnn_params.f32.gemm = (struct gemm_parameters) {
381 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
382 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
383 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
384 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
385 .mr = 6,
386 .nr = 8,
387 };
388 break;
389 default:
390 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard91317c52019-11-22 10:54:35 -0800391 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64,
392 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700393 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
394 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700395 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700396 .nr = 8,
397 };
398 break;
399 }
400 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700401 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard91317c52019-11-22 10:54:35 -0800402 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64,
403 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64,
404 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_lane_ld64,
405 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_lane_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700406 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700407 .nr = 8,
408 };
Frank Barchard32670922019-11-30 21:58:51 -0800409 #endif // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700410
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700411 xnn_params.f32.gemm2 = (struct gemm_parameters) {
412 .gemm = NULL,
Frank Barchard91317c52019-11-22 10:54:35 -0800413 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700414 .mr = 4,
415 .nr = 2,
416 };
417 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
418 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
419 .cr = 4,
420 .mr = 4,
421 };
422 switch (cpuinfo_get_core(0)->uarch) {
423 case cpuinfo_uarch_kryo:
424 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
425 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
426 .cr = 4,
427 .mr = 9,
428 };
429 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700430#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700431 case cpuinfo_uarch_cortex_a53:
432 case cpuinfo_uarch_cortex_a55:
433 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
434 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
435 .cr = 4,
436 .mr = 9,
437 };
438 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700439#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700440 default:
441 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
442 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
443 .cr = 8,
444 .mr = 9,
445 };
446 break;
447 }
448 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
449 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
450 .cr = 4,
451 .mr = 25,
452 };
453 xnn_params.f32.avgpool = (struct avgpool_parameters) {
454 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
455 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
456 .mr = 9,
457 .qr = 8,
458 };
459 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
460 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
461 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
462 .mr = 9,
463 .qr = 8,
464 };
465 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
466 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
467 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
468 .mr = 7,
469 };
470 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800471 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700472 .mr = 9,
473 .qr = 8,
474 };
475 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800476 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700477 .mr = 4,
478 };
479 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800480 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700481 .mr = 9,
482 };
483 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800484 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700485 .mr = 9,
486 .qr = 8,
487 };
Marat Dukhan69722492019-11-11 19:55:50 -0800488 xnn_params.f32.bilinear = (struct bilinear_parameters) {
489 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
490 .pixel_tile = 1,
491 .channel_tile = 8,
492 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700493 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
494 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan14bec502019-11-18 11:35:31 -0800495 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700496 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800497 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
498 .row_tile = 2,
499 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700500 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800501 xnn_params.f32.vadd = (struct vbinary_parameters) {
502 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__neon_x8,
503 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
504 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
505 .element_tile = 8,
506 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800507 xnn_params.f32.vmul = (struct vbinary_parameters) {
508 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
509 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
510 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800511 .element_tile = 8,
512 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700513 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800514 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
515 .channel_tile = 4,
516 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700517 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800518 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700519 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700520 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700521 .mr = 16,
522 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700523 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700524 xnn_params.f32.spmm2 = (struct spmm_parameters) {
525 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
526 .mr = 16,
527 .nr = 2,
528 };
529 xnn_params.f32.spmm4 = (struct spmm_parameters) {
530 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
531 .mr = 16,
532 .nr = 4,
533 };
534 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
535 .ukernel_with_symm_padding =
536 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
537 .output_channel_tile = 4,
538 .output_height_tile = 2,
539 .output_width_tile = 2,
540 };
541 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
542 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
543 .input_width_tile = 4,
544 .output_width_tile = 4,
545 .output_height_tile = 3,
546 };
547 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
548 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
549 .input_width_tile = 4,
550 .output_width_tile = 4,
551 .output_height_tile = 1,
552 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800553 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
554 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
555 .input_width_tile = 4,
556 .output_width_tile = 4,
Erich Elsen4ad51152019-11-19 13:11:53 -0800557 .output_height_tile = 3,
Marat Dukhana99918a2019-11-15 14:40:12 -0800558 };
559 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
560 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
561 .input_width_tile = 4,
562 .output_width_tile = 4,
563 .output_height_tile = 1,
564 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700565 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
566 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
567 .channel_tile = 4,
568 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800569 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700570 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700571
572 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700573 #ifndef XNN_NO_X32_OPERATORS
574 xnn_params.x32.pad = (struct pad_parameters) {
575 .ukernel = xnn_x32_pad_x2__neon,
576 .mr = 2,
577 };
578 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
579 xnn_params.x32.zip = (struct zip_parameters) {
580 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
581 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
582 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
583 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
584 };
585 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700586
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700587#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700588 if (!cpuinfo_has_x86_sse2()) {
589 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
590 return;
591 }
592
593 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700594 #ifndef XNN_NO_Q8_OPERATORS
595 xnn_params.q8.gemm = (struct gemm_parameters) {
596 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
597 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
598 .mr = 4,
599 .nr = 4,
600 .log2_kr = 1,
601 };
602 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
603 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
604 .cr = 8,
605 .mr = 9,
606 };
607 xnn_params.q8.avgpool = (struct avgpool_parameters) {
608 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
609 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
610 .mr = 9,
611 .qr = 8,
612 };
613 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
614 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
615 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
616 .mr = 7,
617 };
618 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
619 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700620
621 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700622 #ifndef XNN_NO_U8_OPERATORS
623 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800624 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__sse2_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700625 .mr = 9,
626 .qr = 8,
627 };
628 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
629 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
630 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
631 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700632
633 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700634 #ifndef XNN_NO_X8_OPERATORS
635 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
636 xnn_params.x8.zip = (struct zip_parameters) {
637 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
638 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
639 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
640 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
641 };
642 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700643
644 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700645 #ifndef XNN_NO_F32_OPERATORS
Marat Dukhan0f349c42019-11-27 11:58:54 -0800646 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
647 xnn_params.f32.gemm = (struct gemm_parameters) {
648 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x16__avx512f_broadcast,
649 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x16__avx512f_broadcast,
650 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x16__avx512f_broadcast,
651 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x16__avx512f_broadcast,
652 .mr = 7,
653 .nr = 16,
654 };
655 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
Marat Dukhan1025ea32019-11-21 16:01:08 -0800656 xnn_params.f32.gemm = (struct gemm_parameters) {
657 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x8__fma3_broadcast,
658 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x8__fma3_broadcast,
659 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__fma3_broadcast,
660 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__fma3_broadcast,
661 .mr = 7,
662 .nr = 8,
663 };
664 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
665 xnn_params.f32.gemm = (struct gemm_parameters) {
666 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x8__avx_broadcast,
667 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x8__avx_broadcast,
668 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__avx_broadcast,
669 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__avx_broadcast,
670 .mr = 7,
671 .nr = 8,
672 };
673 } else {
674 xnn_params.f32.gemm = (struct gemm_parameters) {
675 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
676 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
677 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
678 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
679 .mr = 4,
680 .nr = 8,
681 };
682 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700683 xnn_params.f32.gemm2 = (struct gemm_parameters) {
684 .gemm = NULL,
685 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
686 .mr = 4,
687 .nr = 2,
688 .log2_kr = 2,
689 };
Marat Dukhan479f87e2019-11-27 15:17:06 -0800690 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
691 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
692 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__avx512f,
693 .cr = 16,
694 .mr = 4,
695 };
696 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
697 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__avx512f,
698 .cr = 16,
699 .mr = 9,
700 };
701 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
702 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x25__avx512f,
703 .cr = 16,
704 .mr = 25,
705 };
706 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800707 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
708 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__fma3,
709 .cr = 16,
710 .mr = 4,
711 };
712 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
713 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__fma3,
714 .cr = 16,
715 .mr = 9,
716 };
717 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
718 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__fma3,
719 .cr = 8,
720 .mr = 25,
721 };
722 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
723 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
724 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__avx,
725 .cr = 16,
726 .mr = 4,
727 };
728 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
729 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__avx,
730 .cr = 16,
731 .mr = 9,
732 };
733 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
734 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__avx,
735 .cr = 8,
736 .mr = 25,
737 };
738 } else {
739 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
740 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
741 .cr = 8,
742 .mr = 4,
743 };
744 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
745 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
746 .cr = 8,
747 .mr = 9,
748 };
749 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
750 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
751 .cr = 8,
752 .mr = 25,
753 };
754 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700755 xnn_params.f32.avgpool = (struct avgpool_parameters) {
756 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
757 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
758 .mr = 9,
759 .qr = 8,
760 };
761 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
762 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
763 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
764 .mr = 9,
765 .qr = 8,
766 };
767 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
768 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
769 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
770 .mr = 7,
771 };
772 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800773 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__sse_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700774 .mr = 9,
775 .qr = 8,
776 };
777 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800778 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700779 .mr = 4,
780 };
781 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800782 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700783 .mr = 9,
784 };
785 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800786 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700787 .mr = 9,
788 .qr = 8,
789 };
Marat Dukhan69722492019-11-11 19:55:50 -0800790 xnn_params.f32.bilinear = (struct bilinear_parameters) {
791 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
792 .pixel_tile = 1,
793 .channel_tile = 8,
794 };
Marat Dukhane2c3f292019-11-27 15:40:54 -0800795 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
796 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx512f;
797 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
798 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx;
799 } else {
800 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
801 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700802 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
Marat Dukhan7bee7512019-11-18 15:15:48 -0800803 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700804 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800805 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
806 .row_tile = 2,
807 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700808 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800809 xnn_params.f32.vadd = (struct vbinary_parameters) {
810 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__sse_x8,
811 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
812 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
813 .element_tile = 8,
814 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800815 xnn_params.f32.vmul = (struct vbinary_parameters) {
816 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__sse_x8,
817 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
818 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800819 .element_tile = 8,
820 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700821 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800822 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
823 .channel_tile = 4,
824 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700825 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800826 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700827 xnn_params.f32.spmm = (struct spmm_parameters) {
828 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
829 .mr = 4,
830 .nr = 1,
831 };
832 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
833 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
834 .input_width_tile = 4,
835 .output_width_tile = 4,
836 .output_height_tile = 1,
837 };
838 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
839 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
840 .input_width_tile = 4,
841 .output_width_tile = 4,
842 .output_height_tile = 1,
843 };
844 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
845 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
846 .channel_tile = 4,
847 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800848 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700849 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700850
851 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700852 #ifndef XNN_NO_X32_OPERATORS
853 xnn_params.x32.pad = (struct pad_parameters) {
854 .ukernel = xnn_x32_pad_x2__sse2,
855 .mr = 2,
856 };
857 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
858 xnn_params.x32.zip = (struct zip_parameters) {
859 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
860 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
861 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
862 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
863 };
864 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700865
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700866#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700867 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
868 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
869 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
870 // of two infinities (must produce NaN per IEEE 754 standard).
871 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
872 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
873
XNNPACK Teamb455b122019-09-27 18:10:33 -0700874 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700875 #ifndef XNN_NO_Q8_OPERATORS
876 xnn_params.q8.gemm = (struct gemm_parameters) {
877 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
878 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
879 .mr = 2,
880 .nr = 2,
881 };
882 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
883 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
884 .cr = 1,
885 .mr = 9,
886 };
887 xnn_params.q8.avgpool = (struct avgpool_parameters) {
888 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
889 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
890 .mr = 9,
891 .qr = 8,
892 };
893 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
894 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
895 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
896 .mr = 7,
897 };
898 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
899 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700900
901 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700902 #ifndef XNN_NO_U8_OPERATORS
903 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800904 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700905 .mr = 9,
906 .qr = 8,
907 };
908 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
909 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
910 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
911 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700912
913 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700914 #ifndef XNN_NO_X8_OPERATORS
915 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
916 xnn_params.x8.zip = (struct zip_parameters) {
917 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
918 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
919 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
920 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
921 };
922 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700923
924 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700925 #ifndef XNN_NO_F32_OPERATORS
926 if (is_wasm_x86) {
927 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700928 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
929 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
930 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
931 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700932 .mr = 4,
933 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700934 };
935 } else {
936 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700937 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
938 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
939 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
940 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700941 .mr = 6,
942 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700943 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700944 };
945 }
946 xnn_params.f32.gemm2 = (struct gemm_parameters) {
947 .gemm = NULL,
948 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700949 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700950 .nr = 2,
951 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700952 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700953 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800954 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700955 .cr = 4,
956 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700957 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700958 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800959 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700960 .cr = 4,
961 .mr = 9,
962 };
963 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800964 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700965 .cr = 4,
966 .mr = 25,
967 };
968 xnn_params.f32.avgpool = (struct avgpool_parameters) {
969 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
970 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
971 .mr = 9,
972 .qr = 8,
973 };
974 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
975 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
976 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
977 .mr = 9,
978 .qr = 8,
979 };
980 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
981 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
982 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
983 .mr = 7,
984 };
985 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800986 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700987 .mr = 9,
988 .qr = 8,
989 };
990 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800991 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700992 .mr = 4,
993 };
994 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800995 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700996 .mr = 9,
997 };
998 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800999 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001000 .mr = 9,
1001 .qr = 8,
1002 };
Marat Dukhan69722492019-11-11 19:55:50 -08001003 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1004 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
1005 .pixel_tile = 1,
1006 .channel_tile = 8,
1007 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001008 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
1009 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
1010 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001011 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
1012 .row_tile = 2,
1013 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001014 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001015 xnn_params.f32.vadd = (struct vbinary_parameters) {
1016 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8,
1017 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
1018 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
1019 .element_tile = 8,
1020 };
Marat Dukhan1e782c42019-11-21 17:02:40 -08001021 xnn_params.f32.vmul = (struct vbinary_parameters) {
1022 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__psimd_x8,
1023 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
1024 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -08001025 .element_tile = 8,
1026 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001027 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001028 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
1029 .channel_tile = 4,
1030 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001031 };
1032 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001033
1034 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001035 #ifndef XNN_NO_X32_OPERATORS
1036 xnn_params.x32.pad = (struct pad_parameters) {
1037 .ukernel = xnn_x32_pad_x2__psimd,
1038 .mr = 2,
1039 };
1040 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
1041 xnn_params.x32.zip = (struct zip_parameters) {
1042 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
1043 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
1044 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
1045 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
1046 };
1047 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001048
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001049#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001050 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
1051 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
1052 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
1053 // of two infinities (must produce NaN per IEEE 754 standard).
1054 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
1055 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
1056
1057 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001058 #ifndef XNN_NO_Q8_OPERATORS
1059 xnn_params.q8.gemm = (struct gemm_parameters) {
1060 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
1061 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
1062 .mr = 2,
1063 .nr = 2,
1064 };
1065 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
1066 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
1067 .cr = 1,
1068 .mr = 9,
1069 };
1070 xnn_params.q8.avgpool = (struct avgpool_parameters) {
1071 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
1072 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
1073 .mr = 9,
1074 .qr = 8,
1075 };
1076 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
1077 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
1078 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
1079 .mr = 7,
1080 };
1081 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
1082 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001083
1084 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001085 #ifndef XNN_NO_U8_OPERATORS
1086 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001087 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001088 .mr = 9,
1089 .qr = 8,
1090 };
1091 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
1092 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
1093 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
1094 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001095
1096 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001097 #ifndef XNN_NO_X8_OPERATORS
1098 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
1099 xnn_params.x8.zip = (struct zip_parameters) {
1100 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
1101 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
1102 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
1103 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
1104 };
1105 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001106
1107 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001108 #ifndef XNN_NO_F32_OPERATORS
1109 if (is_wasm_x86) {
1110 xnn_params.f32.gemm = (struct gemm_parameters) {
1111 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
1112 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
1113 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
1114 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
1115 .mr = 2,
1116 .nr = 4,
1117 };
1118 } else {
1119 xnn_params.f32.gemm = (struct gemm_parameters) {
1120 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
1121 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
1122 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
1123 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
1124 .mr = 4,
1125 .nr = 4,
1126 };
1127 }
1128 xnn_params.f32.gemm2 = (struct gemm_parameters) {
1129 .gemm = NULL,
1130 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001131 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001132 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001133 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001134 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001135 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001136 .cr = 1,
1137 .mr = 4,
1138 };
1139 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001140 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001141 .cr = 1,
1142 .mr = 9,
1143 };
1144 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001145 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001146 .cr = 1,
1147 .mr = 25,
1148 };
1149 xnn_params.f32.avgpool = (struct avgpool_parameters) {
1150 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
1151 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
1152 .mr = 9,
1153 .qr = 8,
1154 };
1155 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
1156 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
1157 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
1158 .mr = 9,
1159 .qr = 8,
1160 };
1161 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
1162 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
1163 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
1164 .mr = 7,
1165 };
1166 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001167 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001168 .mr = 9,
1169 .qr = 8,
1170 };
1171 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001172 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001173 .mr = 4,
1174 };
1175 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001176 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001177 .mr = 9,
1178 };
1179 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001180 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001181 .mr = 9,
1182 .qr = 8,
1183 };
Marat Dukhan69722492019-11-11 19:55:50 -08001184 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1185 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1186 .pixel_tile = 1,
1187 .channel_tile = 2,
1188 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001189 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1190 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1191 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001192 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1193 .row_tile = 4,
1194 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001195 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001196 xnn_params.f32.vadd = (struct vbinary_parameters) {
1197 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__scalar_x4,
1198 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__scalar_x4,
1199 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__scalar_x4,
1200 .element_tile = 8,
1201 };
Marat Dukhan1e782c42019-11-21 17:02:40 -08001202 xnn_params.f32.vmul = (struct vbinary_parameters) {
1203 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__scalar_x4,
1204 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
1205 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
Marat Dukhanca2733c2019-11-15 23:21:17 -08001206 .element_tile = 8,
1207 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001208 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001209 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1210 .channel_tile = 1,
1211 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001212 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001213 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001214 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001215 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1216 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001217 .nr = 1,
1218 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001219 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1220 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1221 .mr = 8,
1222 .nr = 2,
1223 };
1224 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1225 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1226 .mr = 8,
1227 .nr = 4,
1228 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001229 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1230 .ukernel_with_symm_padding =
1231 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1232 .output_channel_tile = 4,
1233 .output_height_tile = 1,
1234 .output_width_tile = 1,
1235 };
1236 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1237 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1238 .input_width_tile = 1,
1239 .output_width_tile = 1,
1240 .output_height_tile = 1,
1241 };
1242 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1243 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1244 .input_width_tile = 1,
1245 .output_width_tile = 1,
1246 .output_height_tile = 1,
1247 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001248 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1249 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1250 .input_width_tile = 1,
1251 .output_width_tile = 1,
1252 .output_height_tile = 1,
1253 };
1254 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1255 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1256 .input_width_tile = 1,
1257 .output_width_tile = 1,
1258 .output_height_tile = 1,
1259 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001260 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1261 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1262 .channel_tile = 1,
1263 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001264 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001265 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001266
1267 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001268 #ifndef XNN_NO_X32_OPERATORS
1269 xnn_params.x32.pad = (struct pad_parameters) {
1270 .ukernel = xnn_x32_pad_x2__scalar,
1271 .mr = 2,
1272 };
1273 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1274 xnn_params.x32.zip = (struct zip_parameters) {
1275 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1276 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1277 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1278 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1279 };
1280 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001281
1282#else
1283 #error "Unsupported architecture"
1284#endif
1285 xnn_params.initialized = true;
1286}
1287
Marat Dukhan04f03be2019-11-19 12:36:47 -08001288enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
Marat Dukhand343c222019-10-07 09:22:14 -07001289 #ifndef __EMSCRIPTEN__
1290 if (!cpuinfo_initialize()) {
1291 return xnn_status_out_of_memory;
1292 }
1293 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001294 pthread_once(&init_guard, &init);
1295 if (xnn_params.initialized) {
Marat Dukhan04f03be2019-11-19 12:36:47 -08001296 if (allocator != NULL) {
1297 memcpy(&xnn_params.allocator, allocator, sizeof(struct xnn_allocator));
1298 } else {
1299 xnn_params.allocator.allocate = &xnn_allocate;
1300 xnn_params.allocator.reallocate = &xnn_reallocate;
1301 xnn_params.allocator.deallocate = &xnn_deallocate;
1302 xnn_params.allocator.aligned_allocate = &xnn_aligned_allocate;
1303 xnn_params.allocator.aligned_deallocate = &xnn_aligned_deallocate;
1304 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001305 return xnn_status_success;
1306 } else {
1307 return xnn_status_unsupported_hardware;
1308 }
1309}
1310
1311enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001312 #ifndef __EMSCRIPTEN__
1313 cpuinfo_deinitialize();
1314 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001315 return xnn_status_success;
1316}