blob: e189b8bd5d6931343f233c26ab19bed4ae7e5c76 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080012#include <string.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
14#include <pthread.h>
15
Marat Dukhand343c222019-10-07 09:22:14 -070016#ifndef __EMSCRIPTEN__
17 #include <cpuinfo.h>
18#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
20#include <xnnpack.h>
21#include <xnnpack/argmaxpool.h>
22#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080023#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070026#include <xnnpack/conv.h>
27#include <xnnpack/dwconv.h>
28#include <xnnpack/gavgpool.h>
29#include <xnnpack/gemm.h>
30#include <xnnpack/hswish.h>
31#include <xnnpack/igemm.h>
32#include <xnnpack/log.h>
33#include <xnnpack/lut.h>
34#include <xnnpack/maxpool.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080035#include <xnnpack/memory.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070036#include <xnnpack/pad.h>
37#include <xnnpack/params.h>
38#include <xnnpack/pavgpool.h>
39#include <xnnpack/prelu.h>
40#include <xnnpack/rmax.h>
41#include <xnnpack/spmm.h>
42#include <xnnpack/unpool.h>
43#include <xnnpack/vadd.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080044#include <xnnpack/vbinary.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070045#include <xnnpack/vmulcaddc.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080046#include <xnnpack/vunary.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070047#include <xnnpack/zip.h>
48
49#ifndef XNN_ENABLE_ASSEMBLY
50 #define XNN_ENABLE_ASSEMBLY 1
51#endif
52
53static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
54
55struct xnn_parameters xnn_params = {
56 .initialized = false
57};
58
Marat Dukhan1dadbf72019-10-01 10:46:20 -070059#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
61#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
64#endif
65
66static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070067#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 if (!cpuinfo_has_arm_neon()) {
69 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
70 return;
71 }
72
73 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070074 #ifndef XNN_NO_Q8_OPERATORS
75 xnn_params.q8.gemm = (struct gemm_parameters) {
76 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
77 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
78 .mr = 4,
79 .nr = 8,
80 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070081
Marat Dukhan8fe54e42019-10-10 14:12:59 -070082 #if XNN_ENABLE_ASSEMBLY
83 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
84 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
85 .cr = 8,
86 .mr = 9,
87 };
88 #else
89 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
90 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
91 .cr = 8,
92 .mr = 9,
93 };
94 #endif
95 xnn_params.q8.avgpool = (struct avgpool_parameters) {
96 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
97 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
98 .mr = 9,
99 .qr = 8,
100 };
101 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
102 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
103 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
104 .mr = 7,
105 };
106 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
107 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700108
109 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700110 #ifndef XNN_NO_U8_OPERATORS
111 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800112 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__neon_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700113 .mr = 9,
114 .qr = 8,
115 };
116 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
117 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
118 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
119 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120
121 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700122 #ifndef XNN_NO_X8_OPERATORS
123 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
124 xnn_params.x8.zip = (struct zip_parameters) {
125 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
126 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
127 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
128 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
129 };
130 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700131
132 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700133 #ifndef XNN_NO_F32_OPERATORS
Frank Barchard32670922019-11-30 21:58:51 -0800134 #if XNN_ENABLE_ASSEMBLY
135 xnn_params.f32.gemm = (struct gemm_parameters) {
136 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_ld64,
137 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_lane_ld128,
138 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
139 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
140 .mr = 4,
141 .nr = 8,
142 };
143 #else // XNN_ENABLE_ASSEMBLY
144 xnn_params.f32.gemm = (struct gemm_parameters) {
145 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_lane_ld128,
146 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_lane_ld128,
147 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
148 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
149 .mr = 4,
150 .nr = 8,
151 };
152 #endif // XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700153 xnn_params.f32.gemm2 = (struct gemm_parameters) {
154 .gemm = NULL,
Frank Barchard91317c52019-11-22 10:54:35 -0800155 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700156 .mr = 4,
157 .nr = 2,
158 };
159 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
160 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
161 .cr = 4,
162 .mr = 4,
163 };
164 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
165 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
166 .cr = 4,
167 .mr = 9,
168 };
169 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
170 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
171 .cr = 4,
172 .mr = 25,
173 };
174 xnn_params.f32.avgpool = (struct avgpool_parameters) {
175 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
176 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
177 .mr = 9,
178 .qr = 8,
179 };
180 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
181 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
182 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
183 .mr = 9,
184 .qr = 8,
185 };
186 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
187 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
188 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
189 .mr = 7,
190 };
191 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800192 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700193 .mr = 9,
194 .qr = 8,
195 };
196 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800197 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700198 .mr = 4,
199 };
200 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800201 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700202 .mr = 9,
203 };
204 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800205 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700206 .mr = 9,
207 .qr = 8,
208 };
Marat Dukhan69722492019-11-11 19:55:50 -0800209 xnn_params.f32.bilinear = (struct bilinear_parameters) {
210 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
211 .pixel_tile = 1,
212 .channel_tile = 8,
213 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700214 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
215 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
216 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800217 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
218 .row_tile = 2,
219 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700220 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800221 xnn_params.f32.vadd = (struct vbinary_parameters) {
222 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__neon_x8,
223 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
224 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
225 .element_tile = 8,
226 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800227 xnn_params.f32.vmul = (struct vbinary_parameters) {
228 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
229 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
230 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800231 .element_tile = 8,
232 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800233 xnn_params.f32.vsub = (struct vbinary_parameters) {
234 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__neon_x8,
235 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__neon_x8,
236 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__neon_x8,
237 .element_tile = 8,
238 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700239 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800240 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
241 .channel_tile = 4,
242 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700243 };
244 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700245
246 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700247 #ifndef XNN_NO_X32_OPERATORS
248 xnn_params.x32.pad = (struct pad_parameters) {
249 .ukernel = xnn_x32_pad_x2__neon,
250 .mr = 2,
251 };
252 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
253 xnn_params.x32.zip = (struct zip_parameters) {
254 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
255 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
256 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
257 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
258 };
259 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700260
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700261#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700262
263 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700264 #ifndef XNN_NO_Q8_OPERATORS
265 xnn_params.q8.gemm = (struct gemm_parameters) {
266 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
267 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
268 .mr = 8,
269 .nr = 8,
270 };
271 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
272 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
273 .cr = 8,
274 .mr = 9,
275 };
276 xnn_params.q8.avgpool = (struct avgpool_parameters) {
277 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
278 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
279 .mr = 9,
280 .qr = 8,
281 };
282 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
283 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
284 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
285 .mr = 7,
286 };
287 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
288 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700289
290 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700291 #ifndef XNN_NO_U8_OPERATORS
292 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800293 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__neon_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700294 .mr = 9,
295 .qr = 8,
296 };
297 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
298 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
299 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
300 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700301
302 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700303 #ifndef XNN_NO_X8_OPERATORS
304 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
305 xnn_params.x8.zip = (struct zip_parameters) {
306 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
307 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
308 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
309 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
310 };
311 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700312
313 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700314 #ifndef XNN_NO_F32_OPERATORS
315 #if XNN_ENABLE_ASSEMBLY
316 switch (cpuinfo_get_core(0)->uarch) {
317 case cpuinfo_uarch_kryo:
318 xnn_params.f32.gemm = (struct gemm_parameters) {
319 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
320 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
321 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
322 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
323 .mr = 4,
324 .nr = 8,
325 };
326 break;
327 case cpuinfo_uarch_cortex_a57:
328 xnn_params.f32.gemm = (struct gemm_parameters) {
329 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
330 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
331 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
332 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
333 .mr = 6,
334 .nr = 8,
335 };
336 break;
337 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700338 xnn_params.f32.gemm = (struct gemm_parameters) {
339 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
340 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
341 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
342 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
343 .mr = 4,
344 .nr = 8,
345 };
346 break;
347 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700348 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700349 case cpuinfo_uarch_meerkat_m3:
350 case (cpuinfo_uarch_meerkat_m3 + 1):
351 xnn_params.f32.gemm = (struct gemm_parameters) {
352 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
353 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
354 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
355 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
356 .mr = 6,
357 .nr = 8,
358 };
359 break;
Frank Barcharddf06d802019-11-20 15:53:46 -0800360
361 case cpuinfo_uarch_mongoose_m1:
362 case cpuinfo_uarch_mongoose_m2:
363 xnn_params.f32.gemm = (struct gemm_parameters) {
364 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__neonfma,
365 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__neonfma,
366 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__neonfma,
367 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__neonfma,
368 .mr = 6,
369 .nr = 8,
370 .log2_sr = 2,
371 };
372 break;
373
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700374 case cpuinfo_uarch_cortex_a53:
375 case cpuinfo_uarch_cortex_a55:
376 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700377 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
378 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
379 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
380 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
381 .mr = 6,
382 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700383 };
384 break;
385 case cpuinfo_uarch_cortex_a73:
386 xnn_params.f32.gemm = (struct gemm_parameters) {
387 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
388 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
389 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
390 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
391 .mr = 6,
392 .nr = 8,
393 };
394 break;
395 default:
396 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard91317c52019-11-22 10:54:35 -0800397 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64,
398 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700399 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
400 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700401 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700402 .nr = 8,
403 };
404 break;
405 }
406 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700407 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard91317c52019-11-22 10:54:35 -0800408 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64,
409 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64,
410 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_lane_ld64,
411 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_lane_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700412 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700413 .nr = 8,
414 };
Frank Barchard32670922019-11-30 21:58:51 -0800415 #endif // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700416
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700417 xnn_params.f32.gemm2 = (struct gemm_parameters) {
418 .gemm = NULL,
Frank Barchard91317c52019-11-22 10:54:35 -0800419 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700420 .mr = 4,
421 .nr = 2,
422 };
423 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
424 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
425 .cr = 4,
426 .mr = 4,
427 };
428 switch (cpuinfo_get_core(0)->uarch) {
429 case cpuinfo_uarch_kryo:
430 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
431 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
432 .cr = 4,
433 .mr = 9,
434 };
435 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700436#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700437 case cpuinfo_uarch_cortex_a53:
438 case cpuinfo_uarch_cortex_a55:
439 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
440 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
441 .cr = 4,
442 .mr = 9,
443 };
444 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700445#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700446 default:
447 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
448 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
449 .cr = 8,
450 .mr = 9,
451 };
452 break;
453 }
454 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
455 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
456 .cr = 4,
457 .mr = 25,
458 };
459 xnn_params.f32.avgpool = (struct avgpool_parameters) {
460 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
461 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
462 .mr = 9,
463 .qr = 8,
464 };
465 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
466 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
467 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
468 .mr = 9,
469 .qr = 8,
470 };
471 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
472 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
473 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
474 .mr = 7,
475 };
476 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800477 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700478 .mr = 9,
479 .qr = 8,
480 };
481 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800482 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700483 .mr = 4,
484 };
485 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800486 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700487 .mr = 9,
488 };
489 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800490 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700491 .mr = 9,
492 .qr = 8,
493 };
Marat Dukhan69722492019-11-11 19:55:50 -0800494 xnn_params.f32.bilinear = (struct bilinear_parameters) {
495 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
496 .pixel_tile = 1,
497 .channel_tile = 8,
498 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700499 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
500 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
Marat Dukhan14bec502019-11-18 11:35:31 -0800501 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700502 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800503 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
504 .row_tile = 2,
505 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700506 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800507 xnn_params.f32.vadd = (struct vbinary_parameters) {
508 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__neon_x8,
509 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
510 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
511 .element_tile = 8,
512 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800513 xnn_params.f32.vmul = (struct vbinary_parameters) {
514 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
515 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
516 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800517 .element_tile = 8,
518 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800519 xnn_params.f32.vsub = (struct vbinary_parameters) {
520 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__neon_x8,
521 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__neon_x8,
522 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__neon_x8,
523 .element_tile = 8,
524 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700525 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800526 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
527 .channel_tile = 4,
528 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700529 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800530 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700531 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700532 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700533 .mr = 16,
534 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700535 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700536 xnn_params.f32.spmm2 = (struct spmm_parameters) {
537 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
538 .mr = 16,
539 .nr = 2,
540 };
541 xnn_params.f32.spmm4 = (struct spmm_parameters) {
542 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
543 .mr = 16,
544 .nr = 4,
545 };
546 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
547 .ukernel_with_symm_padding =
548 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
549 .output_channel_tile = 4,
550 .output_height_tile = 2,
551 .output_width_tile = 2,
552 };
553 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
554 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
555 .input_width_tile = 4,
556 .output_width_tile = 4,
557 .output_height_tile = 3,
558 };
559 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
560 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
561 .input_width_tile = 4,
562 .output_width_tile = 4,
563 .output_height_tile = 1,
564 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800565 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
566 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
567 .input_width_tile = 4,
568 .output_width_tile = 4,
Erich Elsen4ad51152019-11-19 13:11:53 -0800569 .output_height_tile = 3,
Marat Dukhana99918a2019-11-15 14:40:12 -0800570 };
571 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
572 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
573 .input_width_tile = 4,
574 .output_width_tile = 4,
575 .output_height_tile = 1,
576 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700577 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
578 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
579 .channel_tile = 4,
580 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800581 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700582 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700583
584 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700585 #ifndef XNN_NO_X32_OPERATORS
586 xnn_params.x32.pad = (struct pad_parameters) {
587 .ukernel = xnn_x32_pad_x2__neon,
588 .mr = 2,
589 };
590 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
591 xnn_params.x32.zip = (struct zip_parameters) {
592 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
593 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
594 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
595 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
596 };
597 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700598
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700599#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700600 if (!cpuinfo_has_x86_sse2()) {
601 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
602 return;
603 }
604
605 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700606 #ifndef XNN_NO_Q8_OPERATORS
607 xnn_params.q8.gemm = (struct gemm_parameters) {
608 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
609 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
610 .mr = 4,
611 .nr = 4,
612 .log2_kr = 1,
613 };
614 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
615 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
616 .cr = 8,
617 .mr = 9,
618 };
619 xnn_params.q8.avgpool = (struct avgpool_parameters) {
620 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
621 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
622 .mr = 9,
623 .qr = 8,
624 };
625 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
626 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
627 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
628 .mr = 7,
629 };
630 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
631 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700632
633 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700634 #ifndef XNN_NO_U8_OPERATORS
635 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800636 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__sse2_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700637 .mr = 9,
638 .qr = 8,
639 };
640 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
641 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
642 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
643 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700644
645 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700646 #ifndef XNN_NO_X8_OPERATORS
647 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
648 xnn_params.x8.zip = (struct zip_parameters) {
649 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
650 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
651 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
652 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
653 };
654 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700655
656 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700657 #ifndef XNN_NO_F32_OPERATORS
Marat Dukhan0f349c42019-11-27 11:58:54 -0800658 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
659 xnn_params.f32.gemm = (struct gemm_parameters) {
660 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x16__avx512f_broadcast,
661 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x16__avx512f_broadcast,
662 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x16__avx512f_broadcast,
663 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x16__avx512f_broadcast,
664 .mr = 7,
665 .nr = 16,
666 };
667 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
Marat Dukhan1025ea32019-11-21 16:01:08 -0800668 xnn_params.f32.gemm = (struct gemm_parameters) {
669 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x8__fma3_broadcast,
670 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x8__fma3_broadcast,
671 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__fma3_broadcast,
672 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__fma3_broadcast,
673 .mr = 7,
674 .nr = 8,
675 };
676 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
677 xnn_params.f32.gemm = (struct gemm_parameters) {
678 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x8__avx_broadcast,
679 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x8__avx_broadcast,
680 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__avx_broadcast,
681 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__avx_broadcast,
682 .mr = 7,
683 .nr = 8,
684 };
685 } else {
686 xnn_params.f32.gemm = (struct gemm_parameters) {
687 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
688 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
689 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
690 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
691 .mr = 4,
692 .nr = 8,
693 };
694 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700695 xnn_params.f32.gemm2 = (struct gemm_parameters) {
696 .gemm = NULL,
697 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
698 .mr = 4,
699 .nr = 2,
700 .log2_kr = 2,
701 };
Marat Dukhan479f87e2019-11-27 15:17:06 -0800702 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
703 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
704 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__avx512f,
705 .cr = 16,
706 .mr = 4,
707 };
708 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
709 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__avx512f,
710 .cr = 16,
711 .mr = 9,
712 };
713 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
714 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x25__avx512f,
715 .cr = 16,
716 .mr = 25,
717 };
718 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800719 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
720 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__fma3,
721 .cr = 16,
722 .mr = 4,
723 };
724 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
725 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__fma3,
726 .cr = 16,
727 .mr = 9,
728 };
729 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
730 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__fma3,
731 .cr = 8,
732 .mr = 25,
733 };
734 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
735 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
736 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__avx,
737 .cr = 16,
738 .mr = 4,
739 };
740 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
741 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__avx,
742 .cr = 16,
743 .mr = 9,
744 };
745 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
746 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__avx,
747 .cr = 8,
748 .mr = 25,
749 };
750 } else {
751 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
752 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
753 .cr = 8,
754 .mr = 4,
755 };
756 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
757 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
758 .cr = 8,
759 .mr = 9,
760 };
761 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
762 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
763 .cr = 8,
764 .mr = 25,
765 };
766 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700767 xnn_params.f32.avgpool = (struct avgpool_parameters) {
768 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
769 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
770 .mr = 9,
771 .qr = 8,
772 };
773 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
774 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
775 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
776 .mr = 9,
777 .qr = 8,
778 };
779 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
780 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
781 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
782 .mr = 7,
783 };
784 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800785 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__sse_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700786 .mr = 9,
787 .qr = 8,
788 };
789 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800790 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700791 .mr = 4,
792 };
793 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800794 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700795 .mr = 9,
796 };
797 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800798 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700799 .mr = 9,
800 .qr = 8,
801 };
Marat Dukhan69722492019-11-11 19:55:50 -0800802 xnn_params.f32.bilinear = (struct bilinear_parameters) {
803 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
804 .pixel_tile = 1,
805 .channel_tile = 8,
806 };
Marat Dukhane2c3f292019-11-27 15:40:54 -0800807 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
808 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx512f;
809 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
810 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx;
811 } else {
812 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
813 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700814 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
Marat Dukhan7bee7512019-11-18 15:15:48 -0800815 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700816 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800817 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
818 .row_tile = 2,
819 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700820 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800821 xnn_params.f32.vadd = (struct vbinary_parameters) {
822 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__sse_x8,
823 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
824 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
825 .element_tile = 8,
826 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800827 xnn_params.f32.vmul = (struct vbinary_parameters) {
828 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__sse_x8,
829 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
830 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800831 .element_tile = 8,
832 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800833 xnn_params.f32.vsub = (struct vbinary_parameters) {
834 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__sse_x8,
835 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__sse_x8,
836 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__sse_x8,
837 .element_tile = 8,
838 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700839 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800840 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
841 .channel_tile = 4,
842 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700843 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800844 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700845 xnn_params.f32.spmm = (struct spmm_parameters) {
846 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
847 .mr = 4,
848 .nr = 1,
849 };
850 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
851 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
852 .input_width_tile = 4,
853 .output_width_tile = 4,
854 .output_height_tile = 1,
855 };
856 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
857 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
858 .input_width_tile = 4,
859 .output_width_tile = 4,
860 .output_height_tile = 1,
861 };
862 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
863 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
864 .channel_tile = 4,
865 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800866 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700867 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700868
869 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700870 #ifndef XNN_NO_X32_OPERATORS
871 xnn_params.x32.pad = (struct pad_parameters) {
872 .ukernel = xnn_x32_pad_x2__sse2,
873 .mr = 2,
874 };
875 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
876 xnn_params.x32.zip = (struct zip_parameters) {
877 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
878 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
879 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
880 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
881 };
882 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700883
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700884#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700885 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
886 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
887 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
888 // of two infinities (must produce NaN per IEEE 754 standard).
889 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
890 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
891
XNNPACK Teamb455b122019-09-27 18:10:33 -0700892 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700893 #ifndef XNN_NO_Q8_OPERATORS
894 xnn_params.q8.gemm = (struct gemm_parameters) {
895 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
896 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
897 .mr = 2,
898 .nr = 2,
899 };
900 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
901 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
902 .cr = 1,
903 .mr = 9,
904 };
905 xnn_params.q8.avgpool = (struct avgpool_parameters) {
906 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
907 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
908 .mr = 9,
909 .qr = 8,
910 };
911 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
912 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
913 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
914 .mr = 7,
915 };
916 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
917 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700918
919 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700920 #ifndef XNN_NO_U8_OPERATORS
921 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800922 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700923 .mr = 9,
924 .qr = 8,
925 };
926 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
927 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
928 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
929 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700930
931 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700932 #ifndef XNN_NO_X8_OPERATORS
933 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
934 xnn_params.x8.zip = (struct zip_parameters) {
935 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
936 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
937 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
938 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
939 };
940 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700941
942 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700943 #ifndef XNN_NO_F32_OPERATORS
944 if (is_wasm_x86) {
945 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700946 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
947 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
948 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
949 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700950 .mr = 4,
951 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700952 };
953 } else {
954 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700955 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
956 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
957 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
958 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700959 .mr = 6,
960 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700961 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700962 };
963 }
964 xnn_params.f32.gemm2 = (struct gemm_parameters) {
965 .gemm = NULL,
966 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700967 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700968 .nr = 2,
969 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700970 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700971 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800972 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700973 .cr = 4,
974 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700975 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700976 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800977 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700978 .cr = 4,
979 .mr = 9,
980 };
981 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800982 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700983 .cr = 4,
984 .mr = 25,
985 };
986 xnn_params.f32.avgpool = (struct avgpool_parameters) {
987 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
988 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
989 .mr = 9,
990 .qr = 8,
991 };
992 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
993 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
994 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
995 .mr = 9,
996 .qr = 8,
997 };
998 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
999 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
1000 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
1001 .mr = 7,
1002 };
1003 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001004 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001005 .mr = 9,
1006 .qr = 8,
1007 };
1008 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001009 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001010 .mr = 4,
1011 };
1012 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001013 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001014 .mr = 9,
1015 };
1016 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001017 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001018 .mr = 9,
1019 .qr = 8,
1020 };
Marat Dukhan69722492019-11-11 19:55:50 -08001021 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1022 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
1023 .pixel_tile = 1,
1024 .channel_tile = 8,
1025 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001026 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
1027 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
1028 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001029 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
1030 .row_tile = 2,
1031 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001032 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001033 xnn_params.f32.vadd = (struct vbinary_parameters) {
1034 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8,
1035 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
1036 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
1037 .element_tile = 8,
1038 };
Marat Dukhan1e782c42019-11-21 17:02:40 -08001039 xnn_params.f32.vmul = (struct vbinary_parameters) {
1040 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__psimd_x8,
1041 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
1042 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -08001043 .element_tile = 8,
1044 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001045 xnn_params.f32.vsub = (struct vbinary_parameters) {
1046 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__psimd_x8,
1047 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__psimd_x8,
1048 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__psimd_x8,
1049 .element_tile = 8,
1050 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001051 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001052 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
1053 .channel_tile = 4,
1054 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001055 };
1056 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001057
1058 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001059 #ifndef XNN_NO_X32_OPERATORS
1060 xnn_params.x32.pad = (struct pad_parameters) {
1061 .ukernel = xnn_x32_pad_x2__psimd,
1062 .mr = 2,
1063 };
1064 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
1065 xnn_params.x32.zip = (struct zip_parameters) {
1066 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
1067 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
1068 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
1069 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
1070 };
1071 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001072
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001073#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001074 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
1075 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
1076 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
1077 // of two infinities (must produce NaN per IEEE 754 standard).
1078 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
1079 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
1080
1081 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001082 #ifndef XNN_NO_Q8_OPERATORS
1083 xnn_params.q8.gemm = (struct gemm_parameters) {
1084 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
1085 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
1086 .mr = 2,
1087 .nr = 2,
1088 };
1089 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
1090 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
1091 .cr = 1,
1092 .mr = 9,
1093 };
1094 xnn_params.q8.avgpool = (struct avgpool_parameters) {
1095 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
1096 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
1097 .mr = 9,
1098 .qr = 8,
1099 };
1100 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
1101 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
1102 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
1103 .mr = 7,
1104 };
1105 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
1106 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001107
1108 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001109 #ifndef XNN_NO_U8_OPERATORS
1110 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001111 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001112 .mr = 9,
1113 .qr = 8,
1114 };
1115 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
1116 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
1117 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
1118 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001119
1120 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001121 #ifndef XNN_NO_X8_OPERATORS
1122 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
1123 xnn_params.x8.zip = (struct zip_parameters) {
1124 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
1125 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
1126 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
1127 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
1128 };
1129 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001130
1131 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001132 #ifndef XNN_NO_F32_OPERATORS
1133 if (is_wasm_x86) {
1134 xnn_params.f32.gemm = (struct gemm_parameters) {
1135 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
1136 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
Marat Dukhan436ebe62019-12-04 15:10:12 -08001137 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__wasm,
1138 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001139 .mr = 2,
1140 .nr = 4,
1141 };
1142 } else {
1143 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001144 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__wasm,
1145 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__wasm,
1146 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__wasm,
1147 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001148 .mr = 4,
1149 .nr = 4,
1150 };
1151 }
1152 xnn_params.f32.gemm2 = (struct gemm_parameters) {
1153 .gemm = NULL,
Marat Dukhan436ebe62019-12-04 15:10:12 -08001154 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__wasm,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001155 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001156 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001157 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001158 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001159 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__wasm_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001160 .cr = 1,
1161 .mr = 4,
1162 };
1163 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001164 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__wasm_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001165 .cr = 1,
1166 .mr = 9,
1167 };
1168 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001169 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__wasm_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001170 .cr = 1,
1171 .mr = 25,
1172 };
1173 xnn_params.f32.avgpool = (struct avgpool_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001174 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__wasm,
1175 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001176 .mr = 9,
1177 .qr = 8,
1178 };
1179 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001180 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__wasm,
1181 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001182 .mr = 9,
1183 .qr = 8,
1184 };
1185 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001186 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__wasm,
1187 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001188 .mr = 7,
1189 };
1190 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001191 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__wasm_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001192 .mr = 9,
1193 .qr = 8,
1194 };
1195 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001196 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001197 .mr = 4,
1198 };
1199 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001200 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001201 .mr = 9,
1202 };
1203 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001204 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001205 .mr = 9,
1206 .qr = 8,
1207 };
Marat Dukhan69722492019-11-11 19:55:50 -08001208 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1209 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1210 .pixel_tile = 1,
1211 .channel_tile = 2,
1212 };
Marat Dukhan436ebe62019-12-04 15:10:12 -08001213 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__wasm;
1214 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__wasm;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001215 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001216 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__wasm_2x4,
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001217 .row_tile = 4,
1218 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001219 };
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001220 xnn_params.f32.vadd = (struct vbinary_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001221 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__wasm_x4,
1222 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasm_x4,
1223 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasm_x4,
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001224 .element_tile = 8,
1225 };
Marat Dukhan1e782c42019-11-21 17:02:40 -08001226 xnn_params.f32.vmul = (struct vbinary_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001227 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__wasm_x4,
1228 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasm_x4,
1229 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasm_x4,
Marat Dukhanca2733c2019-11-15 23:21:17 -08001230 .element_tile = 8,
1231 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001232 xnn_params.f32.vsub = (struct vbinary_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001233 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__wasm_x4,
1234 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__wasm_x4,
1235 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__wasm_x4,
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001236 .element_tile = 8,
1237 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001238 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001239 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__wasm_2x,
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001240 .channel_tile = 1,
1241 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001242 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001243 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001244 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001245 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1246 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001247 .nr = 1,
1248 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001249 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1250 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1251 .mr = 8,
1252 .nr = 2,
1253 };
1254 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1255 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1256 .mr = 8,
1257 .nr = 4,
1258 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001259 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1260 .ukernel_with_symm_padding =
1261 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1262 .output_channel_tile = 4,
1263 .output_height_tile = 1,
1264 .output_width_tile = 1,
1265 };
1266 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1267 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1268 .input_width_tile = 1,
1269 .output_width_tile = 1,
1270 .output_height_tile = 1,
1271 };
1272 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1273 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1274 .input_width_tile = 1,
1275 .output_width_tile = 1,
1276 .output_height_tile = 1,
1277 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001278 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1279 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1280 .input_width_tile = 1,
1281 .output_width_tile = 1,
1282 .output_height_tile = 1,
1283 };
1284 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1285 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1286 .input_width_tile = 1,
1287 .output_width_tile = 1,
1288 .output_height_tile = 1,
1289 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001290 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1291 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1292 .channel_tile = 1,
1293 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001294 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001295 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001296
1297 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001298 #ifndef XNN_NO_X32_OPERATORS
1299 xnn_params.x32.pad = (struct pad_parameters) {
1300 .ukernel = xnn_x32_pad_x2__scalar,
1301 .mr = 2,
1302 };
1303 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1304 xnn_params.x32.zip = (struct zip_parameters) {
1305 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1306 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1307 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1308 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1309 };
1310 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001311
1312#else
1313 #error "Unsupported architecture"
1314#endif
1315 xnn_params.initialized = true;
1316}
1317
Marat Dukhan04f03be2019-11-19 12:36:47 -08001318enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
Marat Dukhand343c222019-10-07 09:22:14 -07001319 #ifndef __EMSCRIPTEN__
1320 if (!cpuinfo_initialize()) {
1321 return xnn_status_out_of_memory;
1322 }
1323 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001324 pthread_once(&init_guard, &init);
1325 if (xnn_params.initialized) {
Marat Dukhan04f03be2019-11-19 12:36:47 -08001326 if (allocator != NULL) {
1327 memcpy(&xnn_params.allocator, allocator, sizeof(struct xnn_allocator));
1328 } else {
1329 xnn_params.allocator.allocate = &xnn_allocate;
1330 xnn_params.allocator.reallocate = &xnn_reallocate;
1331 xnn_params.allocator.deallocate = &xnn_deallocate;
1332 xnn_params.allocator.aligned_allocate = &xnn_aligned_allocate;
1333 xnn_params.allocator.aligned_deallocate = &xnn_aligned_deallocate;
1334 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001335 return xnn_status_success;
1336 } else {
1337 return xnn_status_unsupported_hardware;
1338 }
1339}
1340
1341enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001342 #ifndef __EMSCRIPTEN__
1343 cpuinfo_deinitialize();
1344 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001345 return xnn_status_success;
1346}