blob: 18d7522b6b44831fda2a8f8a42c20363ef617177 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080012#include <string.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070013
14#include <pthread.h>
15
Marat Dukhand343c222019-10-07 09:22:14 -070016#ifndef __EMSCRIPTEN__
17 #include <cpuinfo.h>
18#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
20#include <xnnpack.h>
21#include <xnnpack/argmaxpool.h>
22#include <xnnpack/avgpool.h>
23#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
Marat Dukhan660fd192020-03-10 04:55:30 -070030#include <xnnpack/ibilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070031#include <xnnpack/igemm.h>
32#include <xnnpack/log.h>
33#include <xnnpack/lut.h>
34#include <xnnpack/maxpool.h>
Marat Dukhan04f03be2019-11-19 12:36:47 -080035#include <xnnpack/memory.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070036#include <xnnpack/pad.h>
37#include <xnnpack/params.h>
38#include <xnnpack/pavgpool.h>
39#include <xnnpack/prelu.h>
Marat Dukhan1edc4542020-01-27 12:40:13 -080040#include <xnnpack/raddstoreexpminusmax.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070041#include <xnnpack/rmax.h>
42#include <xnnpack/spmm.h>
43#include <xnnpack/unpool.h>
44#include <xnnpack/vadd.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080045#include <xnnpack/vbinary.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070046#include <xnnpack/vmulcaddc.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080047#include <xnnpack/vunary.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070048#include <xnnpack/zip.h>
49
50#ifndef XNN_ENABLE_ASSEMBLY
51 #define XNN_ENABLE_ASSEMBLY 1
52#endif
53
54static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
55
56struct xnn_parameters xnn_params = {
57 .initialized = false
58};
59
Marat Dukhanf42facc2020-03-08 15:14:53 -070060#if XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070061 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
62#endif
Marat Dukhanf42facc2020-03-08 15:14:53 -070063#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070064 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
65#endif
66
67static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070068#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070069 if (!cpuinfo_has_arm_neon()) {
70 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
71 return;
72 }
73
74 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070075 #ifndef XNN_NO_Q8_OPERATORS
76 xnn_params.q8.gemm = (struct gemm_parameters) {
77 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
78 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
79 .mr = 4,
80 .nr = 8,
81 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070082
Frank Barchard0d1052c2020-03-23 17:28:13 -070083 #if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -070084 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
85 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
86 .cr = 8,
87 .mr = 9,
88 };
89 #else
90 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
91 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
92 .cr = 8,
93 .mr = 9,
94 };
95 #endif
96 xnn_params.q8.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -080097 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_9x__neon_c8,
98 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_9p8x__neon_c8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -070099 .mr = 9,
100 .qr = 8,
101 };
102 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -0700103 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_7x__neon_c8,
104 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_7p7x__neon_c8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700105 .mr = 7,
106 };
107 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
108 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700109
110 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700111 #ifndef XNN_NO_U8_OPERATORS
112 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800113 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__neon_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700114 .mr = 9,
115 .qr = 8,
116 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700117 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon_x64;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700118 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
119 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
120 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700121
122 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700123 #ifndef XNN_NO_X8_OPERATORS
124 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
125 xnn_params.x8.zip = (struct zip_parameters) {
126 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
127 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
128 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
129 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
130 };
131 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700132
133 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700134 #ifndef XNN_NO_F32_OPERATORS
Frank Barchard32670922019-11-30 21:58:51 -0800135 #if XNN_ENABLE_ASSEMBLY
Frank Barchardf9a34842019-12-12 11:17:50 -0800136 switch (cpuinfo_get_core(0)->uarch) {
Marat Dukhan5b5a0622020-02-13 11:51:43 -0800137 case cpuinfo_uarch_cortex_a5:
138 case cpuinfo_uarch_cortex_a7:
139 xnn_params.f32.gemm = (struct gemm_parameters) {
140 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_ld64,
141 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch32_neon_ld64,
142 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
143 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
144 .mr = 4,
145 .nr = 8,
146 };
147 break;
148
Frank Barchardf9a34842019-12-12 11:17:50 -0800149 case cpuinfo_uarch_cortex_a53:
Marat Dukhanb3801eb2020-03-12 13:41:11 -0700150 case cpuinfo_uarch_cortex_a55r0:
Frank Barchardf9a34842019-12-12 11:17:50 -0800151 xnn_params.f32.gemm = (struct gemm_parameters) {
152 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53,
Frank Barchard775d3492020-02-12 17:38:38 -0800153 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53,
Frank Barchardf9a34842019-12-12 11:17:50 -0800154 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
155 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
156 .mr = 4,
157 .nr = 8,
158 };
159 break;
Frank Barchard4d281a52019-12-12 15:49:41 -0800160
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700161 case cpuinfo_uarch_cortex_a55:
162 xnn_params.f32.gemm = (struct gemm_parameters) {
163 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55,
164 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55,
165 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
166 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
167 .mr = 4,
168 .nr = 8,
169 };
170 break;
171
Frank Barchard4d281a52019-12-12 15:49:41 -0800172 case cpuinfo_uarch_cortex_a57:
173 case cpuinfo_uarch_cortex_a72:
174 case cpuinfo_uarch_cortex_a73:
175 xnn_params.f32.gemm = (struct gemm_parameters) {
176 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_pld_cortex_a75,
Frank Barchard7cdeede2020-02-11 00:17:21 -0800177 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch32_neon_pld_cortex_a75,
Frank Barchard4d281a52019-12-12 15:49:41 -0800178 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
179 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
180 .mr = 4,
181 .nr = 8,
182 };
183 break;
184
Frank Barchard7493bfb2020-02-24 19:18:01 -0800185 case cpuinfo_uarch_krait:
Frank Barchardf9a34842019-12-12 11:17:50 -0800186 default:
187 xnn_params.f32.gemm = (struct gemm_parameters) {
188 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75,
Frank Barchard7cdeede2020-02-11 00:17:21 -0800189 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75,
Frank Barchardf9a34842019-12-12 11:17:50 -0800190 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
191 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
192 .mr = 4,
193 .nr = 8,
194 };
195 break;
196 }
Frank Barchard32670922019-11-30 21:58:51 -0800197 #else // XNN_ENABLE_ASSEMBLY
198 xnn_params.f32.gemm = (struct gemm_parameters) {
199 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_lane_ld128,
200 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_lane_ld128,
201 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
202 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
203 .mr = 4,
204 .nr = 8,
205 };
206 #endif // XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700207 xnn_params.f32.gemm2 = (struct gemm_parameters) {
Marat Dukhan29954272020-02-13 17:56:11 -0800208 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2__neon_lane_ld64,
Frank Barchard91317c52019-11-22 10:54:35 -0800209 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700210 .mr = 4,
211 .nr = 2,
212 };
213 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
214 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
215 .cr = 4,
216 .mr = 4,
217 };
218 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
219 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
220 .cr = 4,
221 .mr = 9,
222 };
223 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
224 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
225 .cr = 4,
226 .mr = 25,
227 };
228 xnn_params.f32.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800229 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_9x__neon_c4,
230 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_9p8x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700231 .mr = 9,
232 .qr = 8,
233 };
234 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800235 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_9x__neon_c4,
236 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_9p8x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700237 .mr = 9,
238 .qr = 8,
239 };
240 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -0700241 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_7x__neon_c4,
242 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_7p7x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700243 .mr = 7,
244 };
245 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Frank Barchardf092a4a2020-03-03 14:22:46 -0800246 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700247 .mr = 9,
248 .qr = 8,
249 };
250 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800251 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700252 .mr = 4,
253 };
254 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800255 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700256 .mr = 9,
257 };
258 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800259 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700260 .mr = 9,
261 .qr = 8,
262 };
Marat Dukhan660fd192020-03-10 04:55:30 -0700263 xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
264 .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__neon_c8,
Marat Dukhan69722492019-11-11 19:55:50 -0800265 .pixel_tile = 1,
266 .channel_tile = 8,
267 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700268 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon_x8;
Marat Dukhan662faa02019-12-09 22:48:16 -0800269 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon_x8;
Marat Dukhan4a24a582020-01-06 13:30:00 -0800270 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700271 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800272 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
273 .row_tile = 2,
274 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700275 };
Marat Dukhan1edc4542020-01-27 12:40:13 -0800276 xnn_params.f32.raddstoreexpminusmax = xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8;
277 xnn_params.f32.rmax = xnn_f32_rmax_ukernel__neon;
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800278 xnn_params.f32.vadd = (struct vbinary_parameters) {
279 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__neon_x8,
280 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
281 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
282 .element_tile = 8,
283 };
Marat Dukhan69180502019-12-06 15:00:31 -0800284 xnn_params.f32.vdiv = (struct vbinary_parameters) {
285 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__scalar_x2,
286 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__scalar_x2,
287 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__scalar_x2,
288 .element_tile = 2,
289 };
Marat Dukhan79e7f842019-12-05 14:35:50 -0800290 xnn_params.f32.vmax = (struct vbinary_parameters) {
291 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
292 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
293 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
294 .element_tile = 8,
295 };
296 xnn_params.f32.vmin = (struct vbinary_parameters) {
297 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__neon_x8,
298 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
299 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
300 .element_tile = 8,
301 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800302 xnn_params.f32.vmul = (struct vbinary_parameters) {
303 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
304 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
305 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800306 .element_tile = 8,
307 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800308 xnn_params.f32.vsub = (struct vbinary_parameters) {
309 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__neon_x8,
310 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__neon_x8,
311 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__neon_x8,
312 .element_tile = 8,
313 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700314 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800315 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
316 .channel_tile = 4,
317 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700318 };
319 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700320
321 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700322 #ifndef XNN_NO_X32_OPERATORS
323 xnn_params.x32.pad = (struct pad_parameters) {
324 .ukernel = xnn_x32_pad_x2__neon,
325 .mr = 2,
326 };
327 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
328 xnn_params.x32.zip = (struct zip_parameters) {
329 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
330 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
331 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
332 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
333 };
334 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700335
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700336#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337
338 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700339 #ifndef XNN_NO_Q8_OPERATORS
340 xnn_params.q8.gemm = (struct gemm_parameters) {
341 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
342 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
343 .mr = 8,
344 .nr = 8,
345 };
346 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
347 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
348 .cr = 8,
349 .mr = 9,
350 };
351 xnn_params.q8.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800352 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_9x__neon_c8,
353 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_9p8x__neon_c8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700354 .mr = 9,
355 .qr = 8,
356 };
357 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -0700358 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_7x__neon_c8,
359 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_7p7x__neon_c8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700360 .mr = 7,
361 };
362 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
363 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700364
365 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700366 #ifndef XNN_NO_U8_OPERATORS
367 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800368 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__neon_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700369 .mr = 9,
370 .qr = 8,
371 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700372 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon_x64;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700373 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
374 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
375 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700376
377 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700378 #ifndef XNN_NO_X8_OPERATORS
379 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
380 xnn_params.x8.zip = (struct zip_parameters) {
381 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
382 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
383 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
384 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
385 };
386 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700387
388 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700389 #ifndef XNN_NO_F32_OPERATORS
Frank Barchard0d1052c2020-03-23 17:28:13 -0700390 #if XNN_PLATFORM_IOS
391 #if XNN_ENABLE_ASSEMBLY
392 xnn_params.f32.gemm = (struct gemm_parameters) {
393 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ios,
394 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_ios,
395 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
396 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
397 .mr = 6,
398 .nr = 8,
399 };
400 #else // !XNN_ENABLE_ASSEMBLY
401 xnn_params.f32.gemm = (struct gemm_parameters) {
402 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64,
403 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64,
404 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_lane_ld64,
405 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_lane_ld64,
406 .mr = 6,
407 .nr = 8,
408 };
409 #endif // XNN_ENABLE_ASSEMBLY
410 #else // !XNN_PLATFORM_IOS
411 #if XNN_ENABLE_ASSEMBLY
412 switch (cpuinfo_get_core(0)->uarch) {
413 case cpuinfo_uarch_cortex_a57:
414 xnn_params.f32.gemm = (struct gemm_parameters) {
415 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
416 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
417 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
418 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
419 .mr = 6,
420 .nr = 8,
421 };
422 break;
423 case cpuinfo_uarch_cortex_a72:
424 xnn_params.f32.gemm = (struct gemm_parameters) {
425 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
426 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
427 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
428 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
429 .mr = 4,
430 .nr = 8,
431 };
432 break;
433 case cpuinfo_uarch_cortex_a75:
434 case cpuinfo_uarch_cortex_a76:
435 case cpuinfo_uarch_exynos_m3:
436 case cpuinfo_uarch_exynos_m4:
437 xnn_params.f32.gemm = (struct gemm_parameters) {
438 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
439 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
440 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
441 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
442 .mr = 6,
443 .nr = 8,
444 };
445 break;
446 case cpuinfo_uarch_exynos_m1:
447 case cpuinfo_uarch_exynos_m2:
448 xnn_params.f32.gemm = (struct gemm_parameters) {
449 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__neonfma,
450 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__neonfma,
451 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__neonfma,
452 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__neonfma,
453 .mr = 6,
454 .nr = 8,
455 .log2_sr = 2,
456 };
457 break;
Frank Barcharddf06d802019-11-20 15:53:46 -0800458
Frank Barchard0d1052c2020-03-23 17:28:13 -0700459 case cpuinfo_uarch_cortex_a53:
460 case cpuinfo_uarch_cortex_a55r0:
461 xnn_params.f32.gemm = (struct gemm_parameters) {
462 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
463 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
464 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
465 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
466 .mr = 6,
467 .nr = 8,
468 };
469 break;
470 case cpuinfo_uarch_cortex_a55:
471 xnn_params.f32.gemm = (struct gemm_parameters) {
472 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a55,
473 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a55,
474 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
475 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
476 .mr = 6,
477 .nr = 8,
478 };
479 break;
480 case cpuinfo_uarch_cortex_a73:
481 xnn_params.f32.gemm = (struct gemm_parameters) {
482 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
483 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
484 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
485 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
486 .mr = 6,
487 .nr = 8,
488 };
489 break;
490 default:
491 case cpuinfo_uarch_cortex_a77:
492 case cpuinfo_uarch_exynos_m5:
493 case cpuinfo_uarch_kryo:
494 xnn_params.f32.gemm = (struct gemm_parameters) {
495 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
496 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
497 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
498 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
499 .mr = 4,
500 .nr = 8,
501 };
502 break;
503 }
504 #else // !XNN_ENABLE_ASSEMBLY
505 xnn_params.f32.gemm = (struct gemm_parameters) {
506 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64,
507 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64,
508 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_lane_ld64,
509 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_lane_ld64,
510 .mr = 6,
511 .nr = 8,
512 };
513 #endif // XNN_ENABLE_ASSEMBLY
514 #endif // XNN_PLATFORM_IOS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700515 xnn_params.f32.gemm2 = (struct gemm_parameters) {
Marat Dukhan29954272020-02-13 17:56:11 -0800516 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64,
Frank Barchard91317c52019-11-22 10:54:35 -0800517 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700518 .mr = 4,
519 .nr = 2,
520 };
521 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
522 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
523 .cr = 4,
524 .mr = 4,
525 };
Frank Barchard0d1052c2020-03-23 17:28:13 -0700526 #if XNN_PLATFORM_IOS
527 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
528 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
529 .cr = 8,
530 .mr = 9,
531 };
532 #else // !XNN_PLATFORM_IOS
533 switch (cpuinfo_get_core(0)->uarch) {
534 case cpuinfo_uarch_kryo:
535 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
536 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
537 .cr = 4,
538 .mr = 9,
539 };
540 break;
541 #if XNN_ENABLE_ASSEMBLY
542 case cpuinfo_uarch_cortex_a53:
543 case cpuinfo_uarch_cortex_a55r0:
544 case cpuinfo_uarch_cortex_a55:
545 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
546 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
547 .cr = 4,
548 .mr = 9,
549 };
550 break;
551 #endif // XNN_ENABLE_ASSEMBLY
552 default:
553 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
554 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
555 .cr = 8,
556 .mr = 9,
557 };
558 break;
559 }
560 #endif // XNN_PLATFORM_IOS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700561 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
562 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
563 .cr = 4,
564 .mr = 25,
565 };
566 xnn_params.f32.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800567 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_9x__neon_c4,
568 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_9p8x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700569 .mr = 9,
570 .qr = 8,
571 };
572 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800573 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_9x__neon_c4,
574 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_9p8x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700575 .mr = 9,
576 .qr = 8,
577 };
578 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -0700579 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_7x__neon_c4,
580 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_7p7x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700581 .mr = 7,
582 };
583 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Frank Barchardf092a4a2020-03-03 14:22:46 -0800584 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__neon_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700585 .mr = 9,
586 .qr = 8,
587 };
588 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800589 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700590 .mr = 4,
591 };
592 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800593 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700594 .mr = 9,
595 };
596 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800597 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700598 .mr = 9,
599 .qr = 8,
600 };
Marat Dukhan660fd192020-03-10 04:55:30 -0700601 xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
602 .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__neonfma_c8,
Marat Dukhan69722492019-11-11 19:55:50 -0800603 .pixel_tile = 1,
604 .channel_tile = 8,
605 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700606 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon_x8;
Marat Dukhan662faa02019-12-09 22:48:16 -0800607 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma_x8;
Marat Dukhan4a24a582020-01-06 13:30:00 -0800608 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700609 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800610 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
611 .row_tile = 2,
612 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700613 };
Marat Dukhan1edc4542020-01-27 12:40:13 -0800614 xnn_params.f32.raddstoreexpminusmax = xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16;
615 xnn_params.f32.rmax = xnn_f32_rmax_ukernel__neon;
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800616 xnn_params.f32.vadd = (struct vbinary_parameters) {
617 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__neon_x8,
618 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
619 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
620 .element_tile = 8,
621 };
Marat Dukhan69180502019-12-06 15:00:31 -0800622 xnn_params.f32.vdiv = (struct vbinary_parameters) {
623 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__neon_x8,
624 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__neon_x8,
625 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__neon_x8,
626 .element_tile = 8,
627 };
Marat Dukhan79e7f842019-12-05 14:35:50 -0800628 xnn_params.f32.vmax = (struct vbinary_parameters) {
629 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
630 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
631 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
632 .element_tile = 8,
633 };
634 xnn_params.f32.vmin = (struct vbinary_parameters) {
635 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__neon_x8,
636 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
637 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
638 .element_tile = 8,
639 };
Marat Dukhan1e782c42019-11-21 17:02:40 -0800640 xnn_params.f32.vmul = (struct vbinary_parameters) {
641 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__neon_x8,
642 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
643 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -0800644 .element_tile = 8,
645 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800646 xnn_params.f32.vsub = (struct vbinary_parameters) {
647 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__neon_x8,
648 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__neon_x8,
649 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__neon_x8,
650 .element_tile = 8,
651 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700652 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800653 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
654 .channel_tile = 4,
655 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700656 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800657 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700658 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700659 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700660 .mr = 16,
661 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700662 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700663 xnn_params.f32.spmm2 = (struct spmm_parameters) {
664 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
665 .mr = 16,
666 .nr = 2,
667 };
668 xnn_params.f32.spmm4 = (struct spmm_parameters) {
669 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
670 .mr = 16,
671 .nr = 4,
672 };
673 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
674 .ukernel_with_symm_padding =
675 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
676 .output_channel_tile = 4,
677 .output_height_tile = 2,
678 .output_width_tile = 2,
679 };
680 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
681 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
682 .input_width_tile = 4,
683 .output_width_tile = 4,
684 .output_height_tile = 3,
685 };
686 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
687 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
688 .input_width_tile = 4,
689 .output_width_tile = 4,
690 .output_height_tile = 1,
691 };
Marat Dukhana99918a2019-11-15 14:40:12 -0800692 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
693 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
694 .input_width_tile = 4,
695 .output_width_tile = 4,
Erich Elsen4ad51152019-11-19 13:11:53 -0800696 .output_height_tile = 3,
Marat Dukhana99918a2019-11-15 14:40:12 -0800697 };
698 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
699 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
700 .input_width_tile = 4,
701 .output_width_tile = 4,
702 .output_height_tile = 1,
703 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700704 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
705 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
706 .channel_tile = 4,
707 };
Marat Dukhanefc47b82019-11-18 09:25:38 -0800708 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700709 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700710
711 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700712 #ifndef XNN_NO_X32_OPERATORS
713 xnn_params.x32.pad = (struct pad_parameters) {
714 .ukernel = xnn_x32_pad_x2__neon,
715 .mr = 2,
716 };
717 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
718 xnn_params.x32.zip = (struct zip_parameters) {
719 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
720 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
721 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
722 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
723 };
724 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700725
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700726#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700727 if (!cpuinfo_has_x86_sse2()) {
728 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
729 return;
730 }
731
732 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700733 #ifndef XNN_NO_Q8_OPERATORS
734 xnn_params.q8.gemm = (struct gemm_parameters) {
735 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
736 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
737 .mr = 4,
738 .nr = 4,
739 .log2_kr = 1,
740 };
741 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
742 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
743 .cr = 8,
744 .mr = 9,
745 };
746 xnn_params.q8.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800747 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_9x__sse2_c8,
748 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_9p8x__sse2_c8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700749 .mr = 9,
750 .qr = 8,
751 };
752 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -0700753 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_7x__sse2_c8,
754 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_7p7x__sse2_c8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700755 .mr = 7,
756 };
757 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
758 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700759
760 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700761 #ifndef XNN_NO_U8_OPERATORS
762 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800763 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__sse2_c16,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700764 .mr = 9,
765 .qr = 8,
766 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700767 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2_x64;
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700768 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
769 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
770 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700771
772 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700773 #ifndef XNN_NO_X8_OPERATORS
774 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
775 xnn_params.x8.zip = (struct zip_parameters) {
776 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
777 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
778 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
779 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
780 };
781 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700782
783 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700784 #ifndef XNN_NO_F32_OPERATORS
Marat Dukhan0f349c42019-11-27 11:58:54 -0800785 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
786 xnn_params.f32.gemm = (struct gemm_parameters) {
787 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_7x16__avx512f_broadcast,
788 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_7x16__avx512f_broadcast,
789 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x16__avx512f_broadcast,
790 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x16__avx512f_broadcast,
791 .mr = 7,
792 .nr = 16,
793 };
794 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
Marat Dukhan27121322019-12-09 14:57:40 -0800795 switch (cpuinfo_get_core(0)->uarch) {
796 case cpuinfo_uarch_zen:
Marat Dukhanb3801eb2020-03-12 13:41:11 -0700797 case cpuinfo_uarch_dhyana:
Marat Dukhan27121322019-12-09 14:57:40 -0800798 xnn_params.f32.gemm = (struct gemm_parameters) {
799 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast,
800 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast,
801 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast,
802 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast,
803 .mr = 4,
804 .nr = 16,
805 .log2_sr = 2,
806 };
807 break;
808 default:
809 xnn_params.f32.gemm = (struct gemm_parameters) {
810 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_5x16__fma3_broadcast,
811 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_5x16__fma3_broadcast,
812 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x16__fma3_broadcast,
813 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x16__fma3_broadcast,
814 .mr = 5,
815 .nr = 16,
816 };
817 break;
818 }
Marat Dukhan1025ea32019-11-21 16:01:08 -0800819 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
820 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhaneccfd712019-12-08 16:49:27 -0800821 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_5x16__avx_broadcast,
822 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_5x16__avx_broadcast,
823 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x16__avx_broadcast,
824 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x16__avx_broadcast,
825 .mr = 5,
826 .nr = 16,
Marat Dukhan1025ea32019-11-21 16:01:08 -0800827 };
828 } else {
829 xnn_params.f32.gemm = (struct gemm_parameters) {
830 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
831 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
832 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
833 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
834 .mr = 4,
835 .nr = 8,
836 };
837 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700838 xnn_params.f32.gemm2 = (struct gemm_parameters) {
Marat Dukhan29954272020-02-13 17:56:11 -0800839 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2c4__sse,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700840 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
841 .mr = 4,
842 .nr = 2,
843 .log2_kr = 2,
844 };
Marat Dukhan479f87e2019-11-27 15:17:06 -0800845 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
846 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
847 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__avx512f,
848 .cr = 16,
849 .mr = 4,
850 };
851 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
852 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__avx512f,
853 .cr = 16,
854 .mr = 9,
855 };
856 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
857 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x25__avx512f,
858 .cr = 16,
859 .mr = 25,
860 };
861 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800862 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
863 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__fma3,
864 .cr = 16,
865 .mr = 4,
866 };
867 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
868 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__fma3,
869 .cr = 16,
870 .mr = 9,
871 };
872 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
873 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__fma3,
874 .cr = 8,
875 .mr = 25,
876 };
877 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
878 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
879 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x4__avx,
880 .cr = 16,
881 .mr = 4,
882 };
883 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
884 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up16x9__avx,
885 .cr = 16,
886 .mr = 9,
887 };
888 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
889 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__avx,
890 .cr = 8,
891 .mr = 25,
892 };
893 } else {
894 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
895 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
896 .cr = 8,
897 .mr = 4,
898 };
899 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
900 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
901 .cr = 8,
902 .mr = 9,
903 };
904 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
905 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
906 .cr = 8,
907 .mr = 25,
908 };
909 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700910 xnn_params.f32.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800911 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_9x__sse_c4,
912 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_9p8x__sse_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700913 .mr = 9,
914 .qr = 8,
915 };
916 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -0800917 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_9x__sse_c4,
918 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_9p8x__sse_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700919 .mr = 9,
920 .qr = 8,
921 };
922 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -0700923 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_7x__sse_c4,
924 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_7p7x__sse_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700925 .mr = 7,
926 };
927 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800928 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__sse_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700929 .mr = 9,
930 .qr = 8,
931 };
932 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800933 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700934 .mr = 4,
935 };
936 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800937 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700938 .mr = 9,
939 };
940 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -0800941 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700942 .mr = 9,
943 .qr = 8,
944 };
Marat Dukhan660fd192020-03-10 04:55:30 -0700945 xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
946 .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__sse_c8,
Marat Dukhan69722492019-11-11 19:55:50 -0800947 .pixel_tile = 1,
948 .channel_tile = 8,
949 };
Marat Dukhane2c3f292019-11-27 15:40:54 -0800950 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700951 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx512f_x16;
Marat Dukhane2c3f292019-11-27 15:40:54 -0800952 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700953 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx_x16;
Marat Dukhane2c3f292019-11-27 15:40:54 -0800954 } else {
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700955 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse_x8;
Marat Dukhane2c3f292019-11-27 15:40:54 -0800956 }
Marat Dukhan662faa02019-12-09 22:48:16 -0800957 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700958 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__avx512f_x16;
Marat Dukhan662faa02019-12-09 22:48:16 -0800959 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_fma3()) {
960 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__fma3_x16;
961 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
962 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__avx_x16;
963 } else {
964 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse_x8;
965 }
Marat Dukhanfa0a4322020-01-06 16:14:29 -0800966 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx2()) {
967 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40;
968 } else {
969 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
970 }
Marat Dukhan90eca0a2020-03-11 00:52:23 -0700971 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
972 xnn_params.f32.prelu = (struct prelu_parameters) {
973 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__avx512f_2x16,
974 .row_tile = 2,
975 .channel_tile = 16,
976 };
977 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
978 xnn_params.f32.prelu = (struct prelu_parameters) {
979 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__avx_2x16,
980 .row_tile = 2,
981 .channel_tile = 16,
982 };
983 } else {
984 xnn_params.f32.prelu = (struct prelu_parameters) {
985 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
986 .row_tile = 2,
987 .channel_tile = 8,
988 };
989 }
Marat Dukhan1edc4542020-01-27 12:40:13 -0800990 xnn_params.f32.raddstoreexpminusmax = xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2;
991 xnn_params.f32.rmax = xnn_f32_rmax_ukernel__sse;
Marat Dukhan9a88efe2019-12-10 15:54:24 -0800992 if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
993 xnn_params.f32.vadd = (struct vbinary_parameters) {
994 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__avx512f_x32,
995 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__avx512f_x32,
996 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__avx512f_x32,
997 .element_tile = 32,
998 };
999 xnn_params.f32.vdiv = (struct vbinary_parameters) {
1000 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__avx512f_x32,
1001 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__avx512f_x32,
1002 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__avx512f_x32,
1003 .element_tile = 32,
1004 };
1005 xnn_params.f32.vmax = (struct vbinary_parameters) {
1006 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__avx512f_x32,
1007 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx512f_x32,
1008 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx512f_x32,
1009 .element_tile = 32,
1010 };
1011 xnn_params.f32.vmin = (struct vbinary_parameters) {
1012 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__avx512f_x32,
1013 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx512f_x32,
1014 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx512f_x32,
1015 .element_tile = 32,
1016 };
1017 xnn_params.f32.vmul = (struct vbinary_parameters) {
1018 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__avx512f_x32,
1019 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__avx512f_x32,
1020 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__avx512f_x32,
1021 .element_tile = 32,
1022 };
1023 xnn_params.f32.vsub = (struct vbinary_parameters) {
1024 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__avx512f_x32,
1025 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__avx512f_x32,
1026 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__avx512f_x32,
1027 .element_tile = 32,
1028 };
1029 } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
1030 xnn_params.f32.vadd = (struct vbinary_parameters) {
1031 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__avx_x16,
1032 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__avx_x16,
1033 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__avx_x16,
1034 .element_tile = 16,
1035 };
1036 xnn_params.f32.vdiv = (struct vbinary_parameters) {
1037 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__avx_x16,
1038 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__avx_x16,
1039 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__avx_x16,
1040 .element_tile = 16,
1041 };
1042 xnn_params.f32.vmax = (struct vbinary_parameters) {
1043 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__avx_x16,
1044 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx_x16,
1045 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx_x16,
1046 .element_tile = 16,
1047 };
1048 xnn_params.f32.vmin = (struct vbinary_parameters) {
1049 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__avx_x16,
1050 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx_x16,
1051 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx_x16,
1052 .element_tile = 16,
1053 };
1054 xnn_params.f32.vmul = (struct vbinary_parameters) {
1055 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__avx_x16,
1056 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__avx_x16,
1057 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__avx_x16,
1058 .element_tile = 16,
1059 };
1060 xnn_params.f32.vsub = (struct vbinary_parameters) {
1061 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__avx_x16,
1062 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__avx_x16,
1063 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__avx_x16,
1064 .element_tile = 16,
1065 };
1066 } else {
1067 xnn_params.f32.vadd = (struct vbinary_parameters) {
1068 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__sse_x8,
1069 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
1070 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
1071 .element_tile = 8,
1072 };
1073 xnn_params.f32.vdiv = (struct vbinary_parameters) {
1074 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__sse_x8,
1075 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__sse_x8,
1076 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__sse_x8,
1077 .element_tile = 8,
1078 };
1079 xnn_params.f32.vmax = (struct vbinary_parameters) {
1080 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__sse_x8,
1081 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__sse_x8,
1082 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__sse_x8,
1083 .element_tile = 8,
1084 };
1085 xnn_params.f32.vmin = (struct vbinary_parameters) {
1086 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__sse_x8,
1087 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__sse_x8,
1088 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__sse_x8,
1089 .element_tile = 8,
1090 };
1091 xnn_params.f32.vmul = (struct vbinary_parameters) {
1092 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__sse_x8,
1093 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
1094 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
1095 .element_tile = 8,
1096 };
1097 xnn_params.f32.vsub = (struct vbinary_parameters) {
1098 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__sse_x8,
1099 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__sse_x8,
1100 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__sse_x8,
1101 .element_tile = 8,
1102 };
1103 }
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001104 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001105 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
1106 .channel_tile = 4,
1107 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001108 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001109 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001110 xnn_params.f32.spmm = (struct spmm_parameters) {
1111 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
1112 .mr = 4,
1113 .nr = 1,
1114 };
1115 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1116 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
1117 .input_width_tile = 4,
1118 .output_width_tile = 4,
1119 .output_height_tile = 1,
1120 };
1121 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1122 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
1123 .input_width_tile = 4,
1124 .output_width_tile = 4,
1125 .output_height_tile = 1,
1126 };
1127 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1128 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
1129 .channel_tile = 4,
1130 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001131 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001132 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001133
1134 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001135 #ifndef XNN_NO_X32_OPERATORS
1136 xnn_params.x32.pad = (struct pad_parameters) {
1137 .ukernel = xnn_x32_pad_x2__sse2,
1138 .mr = 2,
1139 };
1140 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
1141 xnn_params.x32.zip = (struct zip_parameters) {
1142 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
1143 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
1144 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
1145 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
1146 };
1147 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001148
Marat Dukhanf42facc2020-03-08 15:14:53 -07001149#elif XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -07001150 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
1151 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
1152 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
1153 // of two infinities (must produce NaN per IEEE 754 standard).
1154 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
1155 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
1156
XNNPACK Teamb455b122019-09-27 18:10:33 -07001157 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001158 #ifndef XNN_NO_Q8_OPERATORS
1159 xnn_params.q8.gemm = (struct gemm_parameters) {
1160 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
1161 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
1162 .mr = 2,
1163 .nr = 2,
1164 };
1165 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
1166 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
1167 .cr = 1,
1168 .mr = 9,
1169 };
1170 xnn_params.q8.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -08001171 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_9x__scalar_c1,
1172 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001173 .mr = 9,
1174 .qr = 8,
1175 };
1176 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -07001177 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_7x__scalar_c1,
1178 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_7p7x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001179 .mr = 7,
1180 };
1181 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
1182 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001183
1184 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001185 #ifndef XNN_NO_U8_OPERATORS
1186 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001187 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001188 .mr = 9,
1189 .qr = 8,
1190 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001191 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar_x4;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001192 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
1193 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
1194 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001195
1196 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001197 #ifndef XNN_NO_X8_OPERATORS
1198 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
1199 xnn_params.x8.zip = (struct zip_parameters) {
1200 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
1201 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
1202 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
1203 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
1204 };
1205 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001206
1207 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001208 #ifndef XNN_NO_F32_OPERATORS
1209 if (is_wasm_x86) {
1210 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -07001211 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
1212 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
1213 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
1214 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001215 .mr = 4,
1216 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001217 };
1218 } else {
1219 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -07001220 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
1221 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
Marat Dukhan7353eea2020-02-18 16:04:05 -08001222 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__psimd,
Marat Dukhancd945c62019-10-25 11:59:50 -07001223 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001224 .mr = 6,
1225 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -07001226 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001227 };
1228 }
1229 xnn_params.f32.gemm2 = (struct gemm_parameters) {
Marat Dukhan29954272020-02-13 17:56:11 -08001230 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2c4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001231 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -07001232 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001233 .nr = 2,
1234 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -07001235 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001236 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001237 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001238 .cr = 4,
1239 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -07001240 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001241 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001242 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001243 .cr = 4,
1244 .mr = 9,
1245 };
1246 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -08001247 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001248 .cr = 4,
1249 .mr = 25,
1250 };
1251 xnn_params.f32.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -08001252 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_9x__psimd_c4,
1253 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001254 .mr = 9,
1255 .qr = 8,
1256 };
1257 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -08001258 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_9x__psimd_c4,
1259 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001260 .mr = 9,
1261 .qr = 8,
1262 };
1263 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -07001264 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_7x__psimd_c4,
1265 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_7p7x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001266 .mr = 7,
1267 };
1268 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001269 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001270 .mr = 9,
1271 .qr = 8,
1272 };
1273 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001274 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001275 .mr = 4,
1276 };
1277 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001278 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001279 .mr = 9,
1280 };
1281 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001282 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001283 .mr = 9,
1284 .qr = 8,
1285 };
Marat Dukhan660fd192020-03-10 04:55:30 -07001286 xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
1287 .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__psimd_c8,
Marat Dukhan69722492019-11-11 19:55:50 -08001288 .pixel_tile = 1,
1289 .channel_tile = 8,
1290 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001291 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd_x8;
Marat Dukhan662faa02019-12-09 22:48:16 -08001292 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd_x8;
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001293 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__psimd_p5_div_x16;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001294 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001295 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
1296 .row_tile = 2,
1297 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001298 };
Marat Dukhan1edc4542020-01-27 12:40:13 -08001299 xnn_params.f32.raddstoreexpminusmax = xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x16_acc2;
1300 xnn_params.f32.rmax = xnn_f32_rmax_ukernel__psimd;
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001301 xnn_params.f32.vadd = (struct vbinary_parameters) {
1302 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__psimd_x8,
1303 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
1304 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
1305 .element_tile = 8,
1306 };
Marat Dukhan69180502019-12-06 15:00:31 -08001307 xnn_params.f32.vdiv = (struct vbinary_parameters) {
1308 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__psimd_x4,
1309 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__psimd_x4,
1310 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__psimd_x4,
1311 .element_tile = 4,
1312 };
Marat Dukhan79e7f842019-12-05 14:35:50 -08001313 xnn_params.f32.vmax = (struct vbinary_parameters) {
1314 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__psimd_x8,
1315 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__psimd_x8,
1316 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__psimd_x8,
1317 .element_tile = 8,
1318 };
1319 xnn_params.f32.vmin = (struct vbinary_parameters) {
1320 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__psimd_x8,
1321 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__psimd_x8,
1322 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__psimd_x8,
1323 .element_tile = 8,
1324 };
Marat Dukhan1e782c42019-11-21 17:02:40 -08001325 xnn_params.f32.vmul = (struct vbinary_parameters) {
1326 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__psimd_x8,
1327 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
1328 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
Marat Dukhanca2733c2019-11-15 23:21:17 -08001329 .element_tile = 8,
1330 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001331 xnn_params.f32.vsub = (struct vbinary_parameters) {
1332 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__psimd_x8,
1333 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__psimd_x8,
1334 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__psimd_x8,
1335 .element_tile = 8,
1336 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001337 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001338 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
1339 .channel_tile = 4,
1340 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001341 };
1342 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001343
1344 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001345 #ifndef XNN_NO_X32_OPERATORS
1346 xnn_params.x32.pad = (struct pad_parameters) {
1347 .ukernel = xnn_x32_pad_x2__psimd,
1348 .mr = 2,
1349 };
1350 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
1351 xnn_params.x32.zip = (struct zip_parameters) {
1352 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
1353 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
1354 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
1355 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
1356 };
1357 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001358
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001359#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001360 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
1361 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
1362 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
1363 // of two infinities (must produce NaN per IEEE 754 standard).
1364 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
1365 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
1366
1367 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001368 #ifndef XNN_NO_Q8_OPERATORS
1369 xnn_params.q8.gemm = (struct gemm_parameters) {
1370 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
1371 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
1372 .mr = 2,
1373 .nr = 2,
1374 };
1375 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
1376 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
1377 .cr = 1,
1378 .mr = 9,
1379 };
1380 xnn_params.q8.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -08001381 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_9x__scalar_c1,
1382 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001383 .mr = 9,
1384 .qr = 8,
1385 };
1386 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -07001387 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_7x__scalar_c1,
1388 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_7p7x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001389 .mr = 7,
1390 };
1391 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
1392 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001393
1394 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001395 #ifndef XNN_NO_U8_OPERATORS
1396 xnn_params.u8.maxpool = (struct maxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001397 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001398 .mr = 9,
1399 .qr = 8,
1400 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001401 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar_x4;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001402 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
1403 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
1404 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001405
1406 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001407 #ifndef XNN_NO_X8_OPERATORS
1408 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
1409 xnn_params.x8.zip = (struct zip_parameters) {
1410 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
1411 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
1412 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
1413 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
1414 };
1415 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001416
1417 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001418 #ifndef XNN_NO_F32_OPERATORS
1419 if (is_wasm_x86) {
1420 xnn_params.f32.gemm = (struct gemm_parameters) {
1421 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
1422 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
Marat Dukhan436ebe62019-12-04 15:10:12 -08001423 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__wasm,
1424 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001425 .mr = 2,
1426 .nr = 4,
1427 };
1428 } else {
1429 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001430 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__wasm,
1431 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__wasm,
1432 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__wasm,
1433 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__wasm,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001434 .mr = 4,
1435 .nr = 4,
1436 };
1437 }
1438 xnn_params.f32.gemm2 = (struct gemm_parameters) {
Marat Dukhan29954272020-02-13 17:56:11 -08001439 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2__wasm,
Marat Dukhan436ebe62019-12-04 15:10:12 -08001440 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__wasm,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001441 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001442 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001443 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001444 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001445 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__wasm_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001446 .cr = 1,
1447 .mr = 4,
1448 };
1449 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001450 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__wasm_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001451 .cr = 1,
1452 .mr = 9,
1453 };
1454 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001455 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__wasm_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001456 .cr = 1,
1457 .mr = 25,
1458 };
1459 xnn_params.f32.avgpool = (struct avgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -08001460 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_9x__wasm_c1,
1461 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_9p8x__wasm_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001462 .mr = 9,
1463 .qr = 8,
1464 };
1465 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
Marat Dukhan6ee435a2020-02-26 22:33:38 -08001466 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_9x__wasm_c1,
1467 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_9p8x__wasm_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001468 .mr = 9,
1469 .qr = 8,
1470 };
1471 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
Marat Dukhana63a6fc2020-03-10 06:12:48 -07001472 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_7x__wasm_c1,
1473 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_7p7x__wasm_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001474 .mr = 7,
1475 };
1476 xnn_params.f32.maxpool = (struct maxpool_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001477 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8x__wasm_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001478 .mr = 9,
1479 .qr = 8,
1480 };
1481 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001482 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001483 .mr = 4,
1484 };
1485 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001486 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001487 .mr = 9,
1488 };
1489 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
Marat Dukhan329da642019-11-19 21:44:39 -08001490 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001491 .mr = 9,
1492 .qr = 8,
1493 };
Marat Dukhan660fd192020-03-10 04:55:30 -07001494 xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
1495 .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__scalar_c2,
Marat Dukhan69722492019-11-11 19:55:50 -08001496 .pixel_tile = 1,
1497 .channel_tile = 2,
1498 };
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001499 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__wasm_x4;
Marat Dukhan662faa02019-12-09 22:48:16 -08001500 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__wasm_x4;
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001501 xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2;
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001502 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhanc8230a42020-02-24 00:00:35 -08001503 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001504 .row_tile = 4,
1505 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001506 };
Marat Dukhan1edc4542020-01-27 12:40:13 -08001507 xnn_params.f32.raddstoreexpminusmax = xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2;
1508 xnn_params.f32.rmax = xnn_f32_rmax_ukernel__scalar;
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001509 xnn_params.f32.vadd = (struct vbinary_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001510 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__wasm_x4,
1511 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasm_x4,
1512 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasm_x4,
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001513 .element_tile = 8,
1514 };
Marat Dukhan69180502019-12-06 15:00:31 -08001515 xnn_params.f32.vdiv = (struct vbinary_parameters) {
1516 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__wasm_x2,
1517 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__wasm_x2,
1518 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__wasm_x2,
1519 .element_tile = 2,
1520 };
Marat Dukhan79e7f842019-12-05 14:35:50 -08001521 xnn_params.f32.vmax = (struct vbinary_parameters) {
1522 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__wasm_x4,
1523 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasm_x4,
1524 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasm_x4,
1525 .element_tile = 8,
1526 };
1527 xnn_params.f32.vmin = (struct vbinary_parameters) {
1528 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__wasm_x4,
1529 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasm_x4,
1530 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasm_x4,
1531 .element_tile = 8,
1532 };
Marat Dukhan1e782c42019-11-21 17:02:40 -08001533 xnn_params.f32.vmul = (struct vbinary_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001534 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__wasm_x4,
1535 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasm_x4,
1536 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasm_x4,
Marat Dukhanca2733c2019-11-15 23:21:17 -08001537 .element_tile = 8,
1538 };
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001539 xnn_params.f32.vsub = (struct vbinary_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001540 .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__wasm_x4,
1541 .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__wasm_x4,
1542 .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__wasm_x4,
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001543 .element_tile = 8,
1544 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001545 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan436ebe62019-12-04 15:10:12 -08001546 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__wasm_2x,
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001547 .channel_tile = 1,
1548 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001549 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001550 #ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001551 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001552 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1553 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001554 .nr = 1,
1555 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001556 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1557 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1558 .mr = 8,
1559 .nr = 2,
1560 };
1561 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1562 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1563 .mr = 8,
1564 .nr = 4,
1565 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001566 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1567 .ukernel_with_symm_padding =
1568 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1569 .output_channel_tile = 4,
1570 .output_height_tile = 1,
1571 .output_width_tile = 1,
1572 };
1573 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1574 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1575 .input_width_tile = 1,
1576 .output_width_tile = 1,
1577 .output_height_tile = 1,
1578 };
1579 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1580 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1581 .input_width_tile = 1,
1582 .output_width_tile = 1,
1583 .output_height_tile = 1,
1584 };
Marat Dukhana99918a2019-11-15 14:40:12 -08001585 xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
1586 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
1587 .input_width_tile = 1,
1588 .output_width_tile = 1,
1589 .output_height_tile = 1,
1590 };
1591 xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
1592 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
1593 .input_width_tile = 1,
1594 .output_width_tile = 1,
1595 .output_height_tile = 1,
1596 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001597 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1598 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1599 .channel_tile = 1,
1600 };
Marat Dukhanefc47b82019-11-18 09:25:38 -08001601 #endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001602 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001603
1604 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001605 #ifndef XNN_NO_X32_OPERATORS
1606 xnn_params.x32.pad = (struct pad_parameters) {
1607 .ukernel = xnn_x32_pad_x2__scalar,
1608 .mr = 2,
1609 };
1610 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1611 xnn_params.x32.zip = (struct zip_parameters) {
1612 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1613 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1614 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1615 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1616 };
1617 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001618
1619#else
1620 #error "Unsupported architecture"
1621#endif
1622 xnn_params.initialized = true;
1623}
1624
Marat Dukhan04f03be2019-11-19 12:36:47 -08001625enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
Marat Dukhand343c222019-10-07 09:22:14 -07001626 #ifndef __EMSCRIPTEN__
1627 if (!cpuinfo_initialize()) {
1628 return xnn_status_out_of_memory;
1629 }
1630 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001631 pthread_once(&init_guard, &init);
1632 if (xnn_params.initialized) {
Marat Dukhan04f03be2019-11-19 12:36:47 -08001633 if (allocator != NULL) {
1634 memcpy(&xnn_params.allocator, allocator, sizeof(struct xnn_allocator));
1635 } else {
1636 xnn_params.allocator.allocate = &xnn_allocate;
1637 xnn_params.allocator.reallocate = &xnn_reallocate;
1638 xnn_params.allocator.deallocate = &xnn_deallocate;
1639 xnn_params.allocator.aligned_allocate = &xnn_aligned_allocate;
1640 xnn_params.allocator.aligned_deallocate = &xnn_aligned_deallocate;
1641 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001642 return xnn_status_success;
1643 } else {
1644 return xnn_status_unsupported_hardware;
1645 }
1646}
1647
1648enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001649 #ifndef __EMSCRIPTEN__
1650 cpuinfo_deinitialize();
1651 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001652 return xnn_status_success;
1653}