blob: 5562fc9ee8cb2fa8317b91c0e0301d9cfd187d00 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <stddef.h>
12#include <stdint.h>
13
14#include <pthreadpool.h>
15
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070016#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include <xnnpack/compute.h>
18
19
20enum xnn_ukernel_type {
21 xnn_ukernel_type_none = 0,
22 xnn_ukernel_type_add,
23 xnn_ukernel_type_argmax_pooling,
24 xnn_ukernel_type_average_pooling,
Marat Dukhanb1a0fc32019-12-02 19:32:02 -080025 xnn_ukernel_type_binary_elementwise,
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 xnn_ukernel_type_channel_shuffle,
Marat Dukhan1f29b802020-05-15 23:46:39 -070027 xnn_ukernel_type_conv2d_hwc2chw,
XNNPACK Teamb455b122019-09-27 18:10:33 -070028 xnn_ukernel_type_dwconv,
29 xnn_ukernel_type_gemm,
30 xnn_ukernel_type_global_average_pooling,
Marat Dukhan346a9e52019-11-15 09:06:30 -080031 xnn_ukernel_type_igemm,
XNNPACK Teamb455b122019-09-27 18:10:33 -070032 xnn_ukernel_type_lut,
33 xnn_ukernel_type_max_pooling,
34 xnn_ukernel_type_pad,
35 xnn_ukernel_type_pixelwise_average_pooling,
36 xnn_ukernel_type_prelu,
Marat Dukhanfd8e6892020-01-27 15:25:25 -080037 xnn_ukernel_type_softmax,
XNNPACK Teamb455b122019-09-27 18:10:33 -070038 xnn_ukernel_type_spmm,
39 xnn_ukernel_type_subconv2d,
Marat Dukhanc3065f52020-06-04 13:33:32 -070040 xnn_ukernel_type_unary_elementwise,
XNNPACK Teamb455b122019-09-27 18:10:33 -070041 xnn_ukernel_type_unpooling,
42 xnn_ukernel_type_vmulcaddc,
43};
44
45enum xnn_operator_type {
Marat Dukhan3b59de22020-06-03 20:15:19 -070046 xnn_operator_type_invalid = 0,
Marat Dukhan5020b962020-06-08 13:30:10 -070047 xnn_operator_type_abs_nc_f32,
Frank Barchard01898c02020-06-23 21:49:50 -070048 xnn_operator_type_add_nd_f16,
Marat Dukhanb1a0fc32019-12-02 19:32:02 -080049 xnn_operator_type_add_nd_f32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080050 xnn_operator_type_argmax_pooling_nhwc_f32,
51 xnn_operator_type_average_pooling_nhwc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070052 xnn_operator_type_average_pooling_nhwc_qu8,
Marat Dukhan64e52512020-06-09 13:41:16 -070053 xnn_operator_type_bankers_rounding_nc_f32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080054 xnn_operator_type_channel_shuffle_nc_x32,
55 xnn_operator_type_channel_shuffle_nc_x8,
56 xnn_operator_type_clamp_nc_f32,
57 xnn_operator_type_clamp_nc_u8,
Marat Dukhan64e52512020-06-09 13:41:16 -070058 xnn_operator_type_ceiling_nc_f32,
Marat Dukhan065b11e2020-05-22 09:49:41 -070059 xnn_operator_type_constant_pad_nd_x32,
Marat Dukhan4e21b272020-06-04 18:45:01 -070060 xnn_operator_type_convolution_nchw_f32,
Frank Barchard49b4dcc2020-06-26 14:07:19 -070061 xnn_operator_type_convolution_nhwc_f16,
Marat Dukhanefc47b82019-11-18 09:25:38 -080062 xnn_operator_type_convolution_nhwc_f32,
Marat Dukhan16f1e1a2020-08-04 16:38:22 -070063 xnn_operator_type_convolution_nhwc_qs8,
Marat Dukhan08b7a972020-07-14 18:17:29 -070064 xnn_operator_type_convolution_nhwc_qu8,
Marat Dukhan4e21b272020-06-04 18:45:01 -070065 xnn_operator_type_copy_nc_x32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080066 xnn_operator_type_deconvolution_nhwc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070067 xnn_operator_type_deconvolution_nhwc_qu8,
Marat Dukhan69180502019-12-06 15:00:31 -080068 xnn_operator_type_divide_nd_f32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080069 xnn_operator_type_fully_connected_nc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070070 xnn_operator_type_fully_connected_nc_qu8,
Marat Dukhan64e52512020-06-09 13:41:16 -070071 xnn_operator_type_floor_nc_f32,
Frank Barchard7e2cbb02020-06-12 01:22:13 -070072 xnn_operator_type_global_average_pooling_nwc_f16,
Marat Dukhanefc47b82019-11-18 09:25:38 -080073 xnn_operator_type_global_average_pooling_nwc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070074 xnn_operator_type_global_average_pooling_nwc_qu8,
Marat Dukhanefc47b82019-11-18 09:25:38 -080075 xnn_operator_type_global_average_pooling_ncw_f32,
76 xnn_operator_type_hardswish_nc_f32,
Marat Dukhan28813332020-06-10 18:05:38 -070077 xnn_operator_type_leaky_relu_nc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070078 xnn_operator_type_leaky_relu_nc_qu8,
Marat Dukhanefc47b82019-11-18 09:25:38 -080079 xnn_operator_type_max_pooling_nhwc_f32,
80 xnn_operator_type_max_pooling_nhwc_u8,
Marat Dukhan79e7f842019-12-05 14:35:50 -080081 xnn_operator_type_maximum_nd_f32,
82 xnn_operator_type_minimum_nd_f32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080083 xnn_operator_type_multiply_nd_f32,
Marat Dukhan5020b962020-06-08 13:30:10 -070084 xnn_operator_type_negate_nc_f32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080085 xnn_operator_type_prelu_nc_f32,
86 xnn_operator_type_resize_bilinear_nhwc_f32,
87 xnn_operator_type_sigmoid_nc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070088 xnn_operator_type_sigmoid_nc_qu8,
Marat Dukhanfd8e6892020-01-27 15:25:25 -080089 xnn_operator_type_softmax_nc_f32,
Marat Dukhan08b7a972020-07-14 18:17:29 -070090 xnn_operator_type_softmax_nc_qu8,
Marat Dukhan5020b962020-06-08 13:30:10 -070091 xnn_operator_type_square_nc_f32,
Marat Dukhan6804bbd2020-06-30 19:26:11 -070092 xnn_operator_type_square_root_nc_f32,
Marat Dukhanf7399262020-06-05 10:58:44 -070093 xnn_operator_type_squared_difference_nd_f32,
Marat Dukhan05f3f6d2019-12-03 15:13:53 -080094 xnn_operator_type_subtract_nd_f32,
Marat Dukhan64e52512020-06-09 13:41:16 -070095 xnn_operator_type_truncation_nc_f32,
Marat Dukhanefc47b82019-11-18 09:25:38 -080096 xnn_operator_type_unpooling_nhwc_x32,
XNNPACK Teamb455b122019-09-27 18:10:33 -070097};
98
Marat Dukhan1f29b802020-05-15 23:46:39 -070099struct xnn_ukernel_conv2d {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700100 union {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700101 xnn_conv_hwc2chw_ukernel_function hwc2chw_function;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700102 xnn_conv_hwc_ukernel_function hwc_function;
103 };
104 uint8_t output_height_tile;
105 uint8_t output_channel_tile;
106};
107
108struct xnn_ukernel_dwconv {
109 union {
Marat Dukhanaefaef32020-04-09 07:09:34 -0700110 xnn_dwconv_unipass_ukernel_function unipass_function;
111 xnn_dwconv_multipass_ukernel_function multipass_function;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 };
Marat Dukhanaefaef32020-04-09 07:09:34 -0700113 uint8_t primary_tile;
114 uint8_t incremental_tile;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700115};
116
117// Direct 2D Depthwise Convolution
118struct xnn_ukernel_dwconv2d {
119 union {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700120 xnn_dwconv_chw_ukernel_function chw_function;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700121 };
122 uint8_t input_width_tile;
123 uint8_t output_width_tile;
124};
125
126struct xnn_ukernel_gemm {
Marat Dukhan05702cf2020-03-26 15:41:33 -0700127 struct xnn_hmp_gemm_ukernel general_case;
128 struct xnn_hmp_gemm_ukernel mr1_case;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129 uint8_t mr;
130 uint8_t nr;
131 uint8_t kr;
132};
133
134struct xnn_ukernel_igemm {
Marat Dukhan05702cf2020-03-26 15:41:33 -0700135 struct xnn_hmp_igemm_ukernel general_case;
136 struct xnn_hmp_igemm_ukernel mr1_case;
137 struct xnn_hmp_gemm_ukernel gemm_case;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700138 uint8_t mr;
139 uint8_t nr;
140 uint8_t kr;
141};
142
143struct xnn_ukernel_spmm {
144 xnn_spmm_ukernel_function function;
145 uint8_t mr;
146};
147
148struct xnn_ukernel_vmulcaddc {
149 xnn_vmulcaddc_ukernel_function function;
150 uint8_t mr;
151};
152
Frank Barchardc67dd7f2020-07-06 11:23:57 -0700153struct xnn_ukernel_vbinary {
Frank Barchard65beb1a2020-07-20 16:40:02 -0700154 xnn_vbinary_ukernel_function op_function;
155 xnn_vbinary_ukernel_function opc_function;
156 xnn_vbinary_ukernel_function ropc_function;
Frank Barchardc67dd7f2020-07-06 11:23:57 -0700157};
158
Frank Barchard62c5e232020-07-21 17:42:19 -0700159struct xnn_ukernel_vunary {
160 xnn_vunary_ukernel_function function;
161};
162
XNNPACK Teamb455b122019-09-27 18:10:33 -0700163struct xnn_ukernel {
164 enum xnn_ukernel_type type;
165 union {
Marat Dukhan1f29b802020-05-15 23:46:39 -0700166 struct xnn_ukernel_conv2d conv2d;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700167 struct xnn_ukernel_dwconv dwconv;
168 struct xnn_ukernel_dwconv2d dwconv2d;
169 struct xnn_ukernel_gemm gemm;
170 struct xnn_ukernel_igemm igemm;
171 struct xnn_ukernel_spmm spmm;
172 struct xnn_ukernel_vmulcaddc vmulcaddc;
Frank Barchardc67dd7f2020-07-06 11:23:57 -0700173 struct xnn_ukernel_vbinary vbinary;
Frank Barchard62c5e232020-07-21 17:42:19 -0700174 struct xnn_ukernel_vunary vunary;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700175 };
176};
177
178enum xnn_run_state {
179 xnn_run_state_invalid = 0,
180 xnn_run_state_ready,
181 xnn_run_state_skip,
182};
183
184struct subconvolution_params {
185 void* weights;
186 size_t w_stride;
187 const void** indirection_buffer;
188 void* output;
189 size_t slice_width;
190 size_t slice_height;
191 size_t indirection_y_stride;
192 size_t indirection_x_stride;
Marat Dukhan80fc9322019-09-29 21:06:36 -0700193 // scaled_kernel_size := kernel_size * mr * sizeof(void*).
XNNPACK Teamb455b122019-09-27 18:10:33 -0700194 size_t scaled_kernel_size;
195};
196
197struct xnn_operator {
198 size_t batch_size;
199 uint32_t padding_top;
200 uint32_t padding_right;
201 uint32_t padding_bottom;
202 uint32_t padding_left;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700203 uint32_t kernel_height;
204 uint32_t kernel_width;
205 uint32_t stride_height;
206 uint32_t stride_width;
207 uint32_t dilation_height;
208 uint32_t dilation_width;
209 uint32_t groups;
210 size_t group_channels;
211 size_t group_input_channels;
212 size_t group_output_channels;
213 size_t channels;
214
215 size_t pad_before_channels;
216 size_t pad_after_channels;
217 uint32_t pad_value;
218
219 size_t input_height;
220 size_t input_width;
221 size_t input_pixel_stride;
222 const void* input;
223 const void** indirection_buffer;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700224
225 size_t input2_pixel_stride;
226 const void* input2;
227
228 size_t output_height;
229 size_t output_width;
230 size_t output_pixel_stride;
231 void* output;
232
233 void* packed_weights;
234 // Total number of non-zero kernel elements when weights use sparse representation.
235 size_t num_nonzero_values;
236 // Total number of non-zero kernel blocks when weights use sparse representation.
237 size_t num_nonzero_blocks;
238 // Total number of output channel blocks when weights use sparse representation.
239 size_t num_output_channel_blocks;
240 // Input channel corresponding to the first non-zero kernel element.
241 size_t first_input_channel;
242
243 float input_scale;
244 float output_scale;
Marat Dukhan54e95a02020-08-06 23:55:13 -0700245 int32_t input_zero_point;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700246 uint8_t output_zero_point;
247 uint8_t output_min;
248 uint8_t output_max;
249
250 size_t valid_batch_size;
251 size_t last_input_height;
252 size_t last_input_width;
253 const void* last_input;
Marat Dukhan69722492019-11-11 19:55:50 -0800254 size_t last_output_height;
255 size_t last_output_width;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700256 void* last_output;
257
258 void* zero_buffer;
259 void* lookup_table;
260 void* pixelwise_buffer;
261 struct subconvolution_params* subconvolution_buffer;
Marat Dukhan8440fde2019-10-24 12:46:13 -0700262 uint32_t flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700263
264 union {
Marat Dukhan5020b962020-06-08 13:30:10 -0700265 union xnn_f32_abs_params f32_abs;
Marat Dukhan28813332020-06-10 18:05:38 -0700266 union xnn_f32_lrelu_params f32_lrelu;
Marat Dukhan5020b962020-06-08 13:30:10 -0700267 union xnn_f32_neg_params f32_neg;
Marat Dukhan64e52512020-06-09 13:41:16 -0700268 union xnn_f32_rnd_params f32_rnd;
Marat Dukhan5868d802020-03-19 17:18:45 -0700269 // Parameters for Global Average Pooling in CHW layout
Marat Dukhanc3065f52020-06-04 13:33:32 -0700270 union xnn_f32_gavgpool_params f32_gavgpool;
271 union xnn_f32_hswish_params f32_hswish;
Frank Barchard01898c02020-06-23 21:49:50 -0700272 struct {
273 struct xnn_f16_minmax_params f16_minmax;
274 struct xnn_f16_scaleminmax_params f16_scaleminmax;
275 };
Marat Dukhan8452ff52020-04-08 20:44:58 -0700276 // Pixelwise Average Pooling normally use f32_minmax_params, but also initialize
277 // f32_scaleminmax_params in case it needs to switch to Global Average Pooling operation.
Marat Dukhan5868d802020-03-19 17:18:45 -0700278 struct {
Marat Dukhanc3065f52020-06-04 13:33:32 -0700279 union xnn_f32_minmax_params f32_minmax;
280 union xnn_f32_scaleminmax_params f32_scaleminmax;
Marat Dukhan5868d802020-03-19 17:18:45 -0700281 };
Marat Dukhanc3065f52020-06-04 13:33:32 -0700282 union xnn_f32_chw_params f32_chw;
Marat Dukhan16f1e1a2020-08-04 16:38:22 -0700283 union xnn_qs8_gemm_params qs8_gemm;
Marat Dukhan08b7a972020-07-14 18:17:29 -0700284 union xnn_qu8_add_params qu8_add;
285 union xnn_qu8_gemm_params qu8_gemm;
286 // Average Pooling normally use qu8_avgpool_params, but also initialize qu8_gavgpool_params in case it needs to switch
Marat Dukhan5868d802020-03-19 17:18:45 -0700287 // to Global Average Pooling operation.
288 struct {
Marat Dukhan08b7a972020-07-14 18:17:29 -0700289 union xnn_qu8_avgpool_params qu8_avgpool;
290 union xnn_qu8_avgpool_params qu8_gavgpool;
Marat Dukhan5868d802020-03-19 17:18:45 -0700291 };
Marat Dukhanc3065f52020-06-04 13:33:32 -0700292 union xnn_u8_minmax_params u8_minmax;
293 } params;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700294 enum xnn_operator_type type;
295 struct xnn_ukernel ukernel;
296
297 struct compute_parameters compute;
298 struct compute_parameters compute2;
299 union {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700300 struct argmax_pooling_context argmax_pooling;
301 struct average_pooling_context average_pooling;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700302 struct channel_shuffle_context channel_shuffle;
Marat Dukhan1f29b802020-05-15 23:46:39 -0700303 struct conv2d_context conv2d;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700304 struct dwconv2d_context dwconv2d;
305 struct dwconv_context dwconv;
Marat Dukhanca2733c2019-11-15 23:21:17 -0800306 struct elementwise_binary_context elementwise_binary;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700307 struct gemm_context gemm;
Marat Dukhanefc47b82019-11-18 09:25:38 -0800308 struct global_average_pooling_nwc_context global_average_pooling_nwc;
309 struct global_average_pooling_ncw_context global_average_pooling_ncw;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700310 struct igemm_context igemm;
311 struct lut_contiguous_context lut_contiguous;
312 struct lut_strided_context lut_strided;
313 struct max_pooling_context max_pooling;
Marat Dukhan4662b192020-05-21 15:52:03 -0700314 struct pad_context pad;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700315 struct pixelwise_average_pooling_context pixelwise_average_pooling;
316 struct prelu_context prelu;
Marat Dukhan69722492019-11-11 19:55:50 -0800317 struct resize_bilinear_context resize_bilinear;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700318 struct spmm_context spmm;
319 struct subconv_context subconv;
Marat Dukhan29954272020-02-13 17:56:11 -0800320 struct subgemm_context subgemm;
Marat Dukhanfd8e6892020-01-27 15:25:25 -0800321 struct f32_three_pass_softmax_context f32_three_pass_softmax;
322 struct u8_softmax_context u8_softmax;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700323 struct univector_contiguous_context univector_contiguous;
324 struct univector_strided_context univector_strided;
325 struct unpooling_context unpooling;
326 struct vmulcaddc_context vmulcaddc;
327 } context;
328
329 enum xnn_run_state state;
Frank Barchard62c5e232020-07-21 17:42:19 -0700330};