XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | // Copyright (c) Facebook, Inc. and its affiliates. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Copyright 2019 Google LLC |
| 5 | // |
| 6 | // This source code is licensed under the BSD-style license found in the |
| 7 | // LICENSE file in the root directory of this source tree. |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include <stddef.h> |
| 12 | #include <stdint.h> |
| 13 | |
| 14 | #include <pthreadpool.h> |
| 15 | |
Marat Dukhan | eeaa7bd | 2019-10-25 17:31:25 -0700 | [diff] [blame] | 16 | #include <xnnpack/params.h> |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 17 | #include <xnnpack/compute.h> |
| 18 | |
| 19 | |
| 20 | enum xnn_ukernel_type { |
Marat Dukhan | bef9a4d | 2020-11-19 13:29:28 -0800 | [diff] [blame] | 21 | xnn_ukernel_type_default = 0, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 22 | xnn_ukernel_type_average_pooling, |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 23 | xnn_ukernel_type_conv2d_hwc2chw, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 24 | xnn_ukernel_type_dwconv, |
| 25 | xnn_ukernel_type_gemm, |
Marat Dukhan | 346a9e5 | 2019-11-15 09:06:30 -0800 | [diff] [blame] | 26 | xnn_ukernel_type_igemm, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 27 | xnn_ukernel_type_pixelwise_average_pooling, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 28 | xnn_ukernel_type_spmm, |
| 29 | xnn_ukernel_type_subconv2d, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 30 | xnn_ukernel_type_vmulcaddc, |
| 31 | }; |
| 32 | |
| 33 | enum xnn_operator_type { |
Marat Dukhan | 3b59de2 | 2020-06-03 20:15:19 -0700 | [diff] [blame] | 34 | xnn_operator_type_invalid = 0, |
Marat Dukhan | 5020b96 | 2020-06-08 13:30:10 -0700 | [diff] [blame] | 35 | xnn_operator_type_abs_nc_f32, |
Frank Barchard | 01898c0 | 2020-06-23 21:49:50 -0700 | [diff] [blame] | 36 | xnn_operator_type_add_nd_f16, |
Marat Dukhan | b1a0fc3 | 2019-12-02 19:32:02 -0800 | [diff] [blame] | 37 | xnn_operator_type_add_nd_f32, |
Marat Dukhan | ff20948 | 2020-09-03 14:26:53 -0700 | [diff] [blame] | 38 | xnn_operator_type_add_nd_qs8, |
Marat Dukhan | db007cd | 2021-07-20 23:42:39 -0700 | [diff] [blame] | 39 | xnn_operator_type_add_nd_qu8, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 40 | xnn_operator_type_argmax_pooling_nhwc_f32, |
| 41 | xnn_operator_type_average_pooling_nhwc_f32, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 42 | xnn_operator_type_average_pooling_nhwc_qu8, |
Marat Dukhan | 64e5251 | 2020-06-09 13:41:16 -0700 | [diff] [blame] | 43 | xnn_operator_type_bankers_rounding_nc_f32, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 44 | xnn_operator_type_channel_shuffle_nc_x8, |
Marat Dukhan | 139e961 | 2021-08-09 09:03:07 -0700 | [diff] [blame] | 45 | xnn_operator_type_channel_shuffle_nc_x32, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 46 | xnn_operator_type_clamp_nc_f32, |
Marat Dukhan | 61c0c9e | 2021-08-16 23:16:14 -0700 | [diff] [blame] | 47 | xnn_operator_type_clamp_nc_s8, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 48 | xnn_operator_type_clamp_nc_u8, |
Marat Dukhan | 64e5251 | 2020-06-09 13:41:16 -0700 | [diff] [blame] | 49 | xnn_operator_type_ceiling_nc_f32, |
Marat Dukhan | 139e961 | 2021-08-09 09:03:07 -0700 | [diff] [blame] | 50 | xnn_operator_type_constant_pad_nd_x8, |
Marat Dukhan | 065b11e | 2020-05-22 09:49:41 -0700 | [diff] [blame] | 51 | xnn_operator_type_constant_pad_nd_x32, |
Marat Dukhan | af2ba00 | 2021-10-24 14:21:41 -0700 | [diff] [blame] | 52 | xnn_operator_type_convert_nc_f16_f32, |
Marat Dukhan | a0c6168 | 2021-11-10 19:23:41 -0800 | [diff] [blame] | 53 | xnn_operator_type_convert_nc_f32_f16, |
Marat Dukhan | ed2d776 | 2021-12-03 23:51:19 -0800 | [diff] [blame] | 54 | xnn_operator_type_convert_nc_f32_qs8, |
| 55 | xnn_operator_type_convert_nc_f32_qu8, |
Marat Dukhan | f92206b | 2021-12-10 17:02:07 -0800 | [diff] [blame] | 56 | xnn_operator_type_convert_nc_qs8_f32, |
| 57 | xnn_operator_type_convert_nc_qu8_f32, |
Marat Dukhan | 4e21b27 | 2020-06-04 18:45:01 -0700 | [diff] [blame] | 58 | xnn_operator_type_convolution_nchw_f32, |
Frank Barchard | 49b4dcc | 2020-06-26 14:07:19 -0700 | [diff] [blame] | 59 | xnn_operator_type_convolution_nhwc_f16, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 60 | xnn_operator_type_convolution_nhwc_f32, |
Marat Dukhan | 9726246 | 2021-06-18 16:14:17 -0700 | [diff] [blame] | 61 | xnn_operator_type_convolution_nhwc_qc8, |
Marat Dukhan | 16f1e1a | 2020-08-04 16:38:22 -0700 | [diff] [blame] | 62 | xnn_operator_type_convolution_nhwc_qs8, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 63 | xnn_operator_type_convolution_nhwc_qu8, |
Marat Dukhan | 4e21b27 | 2020-06-04 18:45:01 -0700 | [diff] [blame] | 64 | xnn_operator_type_copy_nc_x32, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 65 | xnn_operator_type_deconvolution_nhwc_f32, |
Marat Dukhan | bea849a | 2021-07-30 16:25:30 -0700 | [diff] [blame] | 66 | xnn_operator_type_deconvolution_nhwc_qs8, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 67 | xnn_operator_type_deconvolution_nhwc_qu8, |
Artsiom Ablavatski | bbe8506 | 2020-11-05 14:07:37 -0800 | [diff] [blame] | 68 | xnn_operator_type_depth_to_space_nchw2nhwc_x32, |
Marat Dukhan | 0e52117 | 2020-11-25 13:10:04 -0800 | [diff] [blame] | 69 | xnn_operator_type_depth_to_space_nhwc_x32, |
Marat Dukhan | 6918050 | 2019-12-06 15:00:31 -0800 | [diff] [blame] | 70 | xnn_operator_type_divide_nd_f32, |
Marat Dukhan | b6bd4bc | 2020-12-01 17:01:40 -0800 | [diff] [blame] | 71 | xnn_operator_type_elu_nc_f32, |
Marat Dukhan | eec0052 | 2021-09-15 00:01:41 -0700 | [diff] [blame] | 72 | xnn_operator_type_elu_nc_qs8, |
Marat Dukhan | ddb3d16 | 2021-10-25 17:05:51 -0700 | [diff] [blame] | 73 | xnn_operator_type_fully_connected_nc_f16, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 74 | xnn_operator_type_fully_connected_nc_f32, |
Marat Dukhan | d23cb6e | 2021-04-01 01:18:58 -0700 | [diff] [blame] | 75 | xnn_operator_type_fully_connected_nc_qs8, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 76 | xnn_operator_type_fully_connected_nc_qu8, |
Marat Dukhan | 64e5251 | 2020-06-09 13:41:16 -0700 | [diff] [blame] | 77 | xnn_operator_type_floor_nc_f32, |
Frank Barchard | 7e2cbb0 | 2020-06-12 01:22:13 -0700 | [diff] [blame] | 78 | xnn_operator_type_global_average_pooling_nwc_f16, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 79 | xnn_operator_type_global_average_pooling_nwc_f32, |
Marat Dukhan | 9e0b539 | 2020-08-07 02:29:34 -0700 | [diff] [blame] | 80 | xnn_operator_type_global_average_pooling_nwc_qs8, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 81 | xnn_operator_type_global_average_pooling_nwc_qu8, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 82 | xnn_operator_type_global_average_pooling_ncw_f32, |
Frank Barchard | a96948e | 2020-09-11 15:34:18 -0700 | [diff] [blame] | 83 | xnn_operator_type_hardswish_nc_f16, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 84 | xnn_operator_type_hardswish_nc_f32, |
Marat Dukhan | 2881333 | 2020-06-10 18:05:38 -0700 | [diff] [blame] | 85 | xnn_operator_type_leaky_relu_nc_f32, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 86 | xnn_operator_type_leaky_relu_nc_qu8, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 87 | xnn_operator_type_max_pooling_nhwc_f32, |
Marat Dukhan | dc5c148 | 2021-08-16 09:03:15 -0700 | [diff] [blame] | 88 | xnn_operator_type_max_pooling_nhwc_s8, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 89 | xnn_operator_type_max_pooling_nhwc_u8, |
Marat Dukhan | 79e7f84 | 2019-12-05 14:35:50 -0800 | [diff] [blame] | 90 | xnn_operator_type_maximum_nd_f32, |
| 91 | xnn_operator_type_minimum_nd_f32, |
Frank Barchard | 0ea6a77 | 2020-09-09 15:26:31 -0700 | [diff] [blame] | 92 | xnn_operator_type_multiply_nd_f16, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 93 | xnn_operator_type_multiply_nd_f32, |
Marat Dukhan | 0853b8a | 2021-08-03 01:01:53 -0700 | [diff] [blame] | 94 | xnn_operator_type_multiply_nd_qs8, |
| 95 | xnn_operator_type_multiply_nd_qu8, |
Marat Dukhan | 5020b96 | 2020-06-08 13:30:10 -0700 | [diff] [blame] | 96 | xnn_operator_type_negate_nc_f32, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 97 | xnn_operator_type_prelu_nc_f32, |
Artsiom Ablavatski | 9791810 | 2020-10-27 15:52:59 -0700 | [diff] [blame] | 98 | xnn_operator_type_resize_bilinear_nchw_f32, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 99 | xnn_operator_type_resize_bilinear_nhwc_f32, |
Marat Dukhan | 0ab7553 | 2021-11-24 16:50:30 -0800 | [diff] [blame] | 100 | xnn_operator_type_resize_bilinear_nhwc_s8, |
| 101 | xnn_operator_type_resize_bilinear_nhwc_u8, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 102 | xnn_operator_type_sigmoid_nc_f32, |
Marat Dukhan | 71a9bb1 | 2021-09-09 08:54:18 -0700 | [diff] [blame] | 103 | xnn_operator_type_sigmoid_nc_qs8, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 104 | xnn_operator_type_sigmoid_nc_qu8, |
Marat Dukhan | fd8e689 | 2020-01-27 15:25:25 -0800 | [diff] [blame] | 105 | xnn_operator_type_softmax_nc_f32, |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 106 | xnn_operator_type_softmax_nc_qu8, |
Marat Dukhan | 5020b96 | 2020-06-08 13:30:10 -0700 | [diff] [blame] | 107 | xnn_operator_type_square_nc_f32, |
Marat Dukhan | 6804bbd | 2020-06-30 19:26:11 -0700 | [diff] [blame] | 108 | xnn_operator_type_square_root_nc_f32, |
Marat Dukhan | f739926 | 2020-06-05 10:58:44 -0700 | [diff] [blame] | 109 | xnn_operator_type_squared_difference_nd_f32, |
Marat Dukhan | 05f3f6d | 2019-12-03 15:13:53 -0800 | [diff] [blame] | 110 | xnn_operator_type_subtract_nd_f32, |
Marat Dukhan | 8e2fd20 | 2021-09-07 18:42:01 -0700 | [diff] [blame] | 111 | xnn_operator_type_subtract_nd_qs8, |
| 112 | xnn_operator_type_subtract_nd_qu8, |
Marat Dukhan | 5de7bc0 | 2021-09-09 19:04:01 -0700 | [diff] [blame] | 113 | xnn_operator_type_tanh_nc_qs8, |
| 114 | xnn_operator_type_tanh_nc_qu8, |
Marat Dukhan | 64e5251 | 2020-06-09 13:41:16 -0700 | [diff] [blame] | 115 | xnn_operator_type_truncation_nc_f32, |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 116 | xnn_operator_type_unpooling_nhwc_x32, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 117 | }; |
| 118 | |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 119 | struct xnn_ukernel_conv2d { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 120 | union { |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 121 | xnn_conv_hwc2chw_ukernel_function hwc2chw_function; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 122 | xnn_conv_hwc_ukernel_function hwc_function; |
| 123 | }; |
| 124 | uint8_t output_height_tile; |
| 125 | uint8_t output_channel_tile; |
| 126 | }; |
| 127 | |
| 128 | struct xnn_ukernel_dwconv { |
| 129 | union { |
Marat Dukhan | aefaef3 | 2020-04-09 07:09:34 -0700 | [diff] [blame] | 130 | xnn_dwconv_unipass_ukernel_function unipass_function; |
| 131 | xnn_dwconv_multipass_ukernel_function multipass_function; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 132 | }; |
Marat Dukhan | aefaef3 | 2020-04-09 07:09:34 -0700 | [diff] [blame] | 133 | uint8_t primary_tile; |
| 134 | uint8_t incremental_tile; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 135 | }; |
| 136 | |
| 137 | // Direct 2D Depthwise Convolution |
| 138 | struct xnn_ukernel_dwconv2d { |
| 139 | union { |
Marat Dukhan | bf715f9 | 2020-10-23 20:17:00 -0700 | [diff] [blame] | 140 | xnn_dwconv2d_chw_ukernel_function chw_function; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 141 | }; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 142 | uint8_t output_width_tile; |
| 143 | }; |
| 144 | |
| 145 | struct xnn_ukernel_gemm { |
Marat Dukhan | 05702cf | 2020-03-26 15:41:33 -0700 | [diff] [blame] | 146 | struct xnn_hmp_gemm_ukernel general_case; |
| 147 | struct xnn_hmp_gemm_ukernel mr1_case; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 148 | uint8_t mr; |
| 149 | uint8_t nr; |
| 150 | uint8_t kr; |
| 151 | }; |
| 152 | |
| 153 | struct xnn_ukernel_igemm { |
Marat Dukhan | 05702cf | 2020-03-26 15:41:33 -0700 | [diff] [blame] | 154 | struct xnn_hmp_igemm_ukernel general_case; |
| 155 | struct xnn_hmp_igemm_ukernel mr1_case; |
| 156 | struct xnn_hmp_gemm_ukernel gemm_case; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 157 | uint8_t mr; |
| 158 | uint8_t nr; |
| 159 | uint8_t kr; |
| 160 | }; |
| 161 | |
| 162 | struct xnn_ukernel_spmm { |
| 163 | xnn_spmm_ukernel_function function; |
| 164 | uint8_t mr; |
| 165 | }; |
| 166 | |
| 167 | struct xnn_ukernel_vmulcaddc { |
| 168 | xnn_vmulcaddc_ukernel_function function; |
| 169 | uint8_t mr; |
| 170 | }; |
| 171 | |
Frank Barchard | c67dd7f | 2020-07-06 11:23:57 -0700 | [diff] [blame] | 172 | struct xnn_ukernel_vbinary { |
Frank Barchard | 65beb1a | 2020-07-20 16:40:02 -0700 | [diff] [blame] | 173 | xnn_vbinary_ukernel_function op_function; |
| 174 | xnn_vbinary_ukernel_function opc_function; |
| 175 | xnn_vbinary_ukernel_function ropc_function; |
Frank Barchard | c67dd7f | 2020-07-06 11:23:57 -0700 | [diff] [blame] | 176 | }; |
| 177 | |
Frank Barchard | 62c5e23 | 2020-07-21 17:42:19 -0700 | [diff] [blame] | 178 | struct xnn_ukernel_vunary { |
| 179 | xnn_vunary_ukernel_function function; |
| 180 | }; |
| 181 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 182 | struct xnn_ukernel { |
| 183 | enum xnn_ukernel_type type; |
| 184 | union { |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 185 | struct xnn_ukernel_conv2d conv2d; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 186 | struct xnn_ukernel_dwconv dwconv; |
| 187 | struct xnn_ukernel_dwconv2d dwconv2d; |
| 188 | struct xnn_ukernel_gemm gemm; |
| 189 | struct xnn_ukernel_igemm igemm; |
| 190 | struct xnn_ukernel_spmm spmm; |
| 191 | struct xnn_ukernel_vmulcaddc vmulcaddc; |
Frank Barchard | c67dd7f | 2020-07-06 11:23:57 -0700 | [diff] [blame] | 192 | struct xnn_ukernel_vbinary vbinary; |
Frank Barchard | 62c5e23 | 2020-07-21 17:42:19 -0700 | [diff] [blame] | 193 | struct xnn_ukernel_vunary vunary; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 194 | }; |
| 195 | }; |
| 196 | |
| 197 | enum xnn_run_state { |
| 198 | xnn_run_state_invalid = 0, |
| 199 | xnn_run_state_ready, |
| 200 | xnn_run_state_skip, |
| 201 | }; |
| 202 | |
| 203 | struct subconvolution_params { |
| 204 | void* weights; |
| 205 | size_t w_stride; |
| 206 | const void** indirection_buffer; |
| 207 | void* output; |
| 208 | size_t slice_width; |
| 209 | size_t slice_height; |
| 210 | size_t indirection_y_stride; |
| 211 | size_t indirection_x_stride; |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 212 | // scaled_kernel_size := kernel_size * mr * sizeof(void*). |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 213 | size_t scaled_kernel_size; |
| 214 | }; |
| 215 | |
| 216 | struct xnn_operator { |
| 217 | size_t batch_size; |
| 218 | uint32_t padding_top; |
| 219 | uint32_t padding_right; |
| 220 | uint32_t padding_bottom; |
| 221 | uint32_t padding_left; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 222 | uint32_t kernel_height; |
| 223 | uint32_t kernel_width; |
| 224 | uint32_t stride_height; |
| 225 | uint32_t stride_width; |
| 226 | uint32_t dilation_height; |
| 227 | uint32_t dilation_width; |
| 228 | uint32_t groups; |
| 229 | size_t group_channels; |
| 230 | size_t group_input_channels; |
| 231 | size_t group_output_channels; |
| 232 | size_t channels; |
| 233 | |
| 234 | size_t pad_before_channels; |
| 235 | size_t pad_after_channels; |
| 236 | uint32_t pad_value; |
| 237 | |
| 238 | size_t input_height; |
| 239 | size_t input_width; |
| 240 | size_t input_pixel_stride; |
| 241 | const void* input; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 242 | const void* input2; |
Marat Dukhan | 322b21e | 2020-11-24 21:30:38 -0800 | [diff] [blame] | 243 | const void** indirection_buffer; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 244 | |
| 245 | size_t output_height; |
| 246 | size_t output_width; |
| 247 | size_t output_pixel_stride; |
| 248 | void* output; |
| 249 | |
| 250 | void* packed_weights; |
| 251 | // Total number of non-zero kernel elements when weights use sparse representation. |
| 252 | size_t num_nonzero_values; |
| 253 | // Total number of non-zero kernel blocks when weights use sparse representation. |
| 254 | size_t num_nonzero_blocks; |
| 255 | // Total number of output channel blocks when weights use sparse representation. |
| 256 | size_t num_output_channel_blocks; |
| 257 | // Input channel corresponding to the first non-zero kernel element. |
| 258 | size_t first_input_channel; |
| 259 | |
| 260 | float input_scale; |
| 261 | float output_scale; |
Marat Dukhan | 54e95a0 | 2020-08-06 23:55:13 -0700 | [diff] [blame] | 262 | int32_t input_zero_point; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 263 | uint8_t output_zero_point; |
| 264 | uint8_t output_min; |
| 265 | uint8_t output_max; |
| 266 | |
| 267 | size_t valid_batch_size; |
| 268 | size_t last_input_height; |
| 269 | size_t last_input_width; |
| 270 | const void* last_input; |
Marat Dukhan | 6972249 | 2019-11-11 19:55:50 -0800 | [diff] [blame] | 271 | size_t last_output_height; |
| 272 | size_t last_output_width; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 273 | void* last_output; |
| 274 | |
Artsiom Ablavatski | bbe8506 | 2020-11-05 14:07:37 -0800 | [diff] [blame] | 275 | uint32_t block_size; |
| 276 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 277 | void* zero_buffer; |
| 278 | void* lookup_table; |
| 279 | void* pixelwise_buffer; |
| 280 | struct subconvolution_params* subconvolution_buffer; |
Marat Dukhan | 8440fde | 2019-10-24 12:46:13 -0700 | [diff] [blame] | 281 | uint32_t flags; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 282 | |
| 283 | union { |
Marat Dukhan | 134f984 | 2021-12-29 19:57:31 -0800 | [diff] [blame] | 284 | union xnn_f16_f32_cvt_params f16_f32_cvt; |
| 285 | struct xnn_f16_hswish_params f16_hswish; |
Marat Dukhan | 5020b96 | 2020-06-08 13:30:10 -0700 | [diff] [blame] | 286 | union xnn_f32_abs_params f32_abs; |
Marat Dukhan | b6bd4bc | 2020-12-01 17:01:40 -0800 | [diff] [blame] | 287 | union xnn_f32_elu_params f32_elu; |
Marat Dukhan | 2881333 | 2020-06-10 18:05:38 -0700 | [diff] [blame] | 288 | union xnn_f32_lrelu_params f32_lrelu; |
Marat Dukhan | 5020b96 | 2020-06-08 13:30:10 -0700 | [diff] [blame] | 289 | union xnn_f32_neg_params f32_neg; |
Marat Dukhan | 64e5251 | 2020-06-09 13:41:16 -0700 | [diff] [blame] | 290 | union xnn_f32_rnd_params f32_rnd; |
Marat Dukhan | 5868d80 | 2020-03-19 17:18:45 -0700 | [diff] [blame] | 291 | // Parameters for Global Average Pooling in CHW layout |
Marat Dukhan | c3065f5 | 2020-06-04 13:33:32 -0700 | [diff] [blame] | 292 | union xnn_f32_gavgpool_params f32_gavgpool; |
| 293 | union xnn_f32_hswish_params f32_hswish; |
Marat Dukhan | 104ae5e | 2021-05-24 13:41:57 -0700 | [diff] [blame] | 294 | struct xnn_f16_minmax_params f16_minmax; |
| 295 | struct xnn_f16_scaleminmax_params f16_scaleminmax; |
Marat Dukhan | 8452ff5 | 2020-04-08 20:44:58 -0700 | [diff] [blame] | 296 | // Pixelwise Average Pooling normally use f32_minmax_params, but also initialize |
| 297 | // f32_scaleminmax_params in case it needs to switch to Global Average Pooling operation. |
Marat Dukhan | 5868d80 | 2020-03-19 17:18:45 -0700 | [diff] [blame] | 298 | struct { |
Marat Dukhan | c3065f5 | 2020-06-04 13:33:32 -0700 | [diff] [blame] | 299 | union xnn_f32_minmax_params f32_minmax; |
| 300 | union xnn_f32_scaleminmax_params f32_scaleminmax; |
Marat Dukhan | 5868d80 | 2020-03-19 17:18:45 -0700 | [diff] [blame] | 301 | }; |
Marat Dukhan | c3065f5 | 2020-06-04 13:33:32 -0700 | [diff] [blame] | 302 | union xnn_f32_chw_params f32_chw; |
Marat Dukhan | b7c1b71 | 2021-12-30 07:23:57 -0800 | [diff] [blame] | 303 | union xnn_f32_f16_cvt_params f32_f16_cvt; |
Marat Dukhan | ed2d776 | 2021-12-03 23:51:19 -0800 | [diff] [blame] | 304 | union xnn_f32_qs8_cvt_params f32_qs8_cvt; |
| 305 | union xnn_f32_qu8_cvt_params f32_qu8_cvt; |
Marat Dukhan | f92206b | 2021-12-10 17:02:07 -0800 | [diff] [blame] | 306 | union xnn_qs8_f32_cvt_params qs8_f32_cvt; |
| 307 | union xnn_qu8_f32_cvt_params qu8_f32_cvt; |
Marat Dukhan | e3d17bf | 2021-05-24 22:22:43 -0700 | [diff] [blame] | 308 | union xnn_qs8_conv_minmax_params qs8_conv_minmax; |
Marat Dukhan | 9e0b539 | 2020-08-07 02:29:34 -0700 | [diff] [blame] | 309 | // Average Pooling normally use qs8_avgpool_params, but also initialize qs8_gavgpool_params in case it needs to switch |
| 310 | // to Global Average Pooling operation. |
| 311 | struct { |
| 312 | union xnn_qs8_avgpool_params qs8_avgpool; |
| 313 | union xnn_qs8_avgpool_params qs8_gavgpool; |
| 314 | }; |
Marat Dukhan | ff20948 | 2020-09-03 14:26:53 -0700 | [diff] [blame] | 315 | // Quantized Add parameters are sensitive to order of inputs, so we initialize an extra copy with the reversed order. |
| 316 | struct { |
Marat Dukhan | 6428725 | 2021-09-07 16:20:03 -0700 | [diff] [blame] | 317 | union xnn_qs8_addsub_minmax_params qs8_addsub; |
| 318 | union xnn_qs8_addsub_minmax_params qs8_raddsub; |
Marat Dukhan | ff20948 | 2020-09-03 14:26:53 -0700 | [diff] [blame] | 319 | }; |
Marat Dukhan | db007cd | 2021-07-20 23:42:39 -0700 | [diff] [blame] | 320 | struct { |
Marat Dukhan | 0853b8a | 2021-08-03 01:01:53 -0700 | [diff] [blame] | 321 | union xnn_qs8_mul_minmax_params qs8_mul; |
| 322 | union xnn_qs8_mul_minmax_params qs8_rmul; |
| 323 | }; |
| 324 | struct { |
Marat Dukhan | 6428725 | 2021-09-07 16:20:03 -0700 | [diff] [blame] | 325 | union xnn_qu8_addsub_minmax_params qu8_addsub; |
| 326 | union xnn_qu8_addsub_minmax_params qu8_raddsub; |
Marat Dukhan | db007cd | 2021-07-20 23:42:39 -0700 | [diff] [blame] | 327 | }; |
Marat Dukhan | 0853b8a | 2021-08-03 01:01:53 -0700 | [diff] [blame] | 328 | struct { |
| 329 | union xnn_qu8_mul_minmax_params qu8_mul; |
| 330 | union xnn_qu8_mul_minmax_params qu8_rmul; |
| 331 | }; |
Marat Dukhan | e3d17bf | 2021-05-24 22:22:43 -0700 | [diff] [blame] | 332 | union xnn_qu8_conv_minmax_params qu8_conv_minmax; |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 333 | // Average Pooling normally use qu8_avgpool_params, but also initialize qu8_gavgpool_params in case it needs to switch |
Marat Dukhan | 5868d80 | 2020-03-19 17:18:45 -0700 | [diff] [blame] | 334 | // to Global Average Pooling operation. |
| 335 | struct { |
Marat Dukhan | 08b7a97 | 2020-07-14 18:17:29 -0700 | [diff] [blame] | 336 | union xnn_qu8_avgpool_params qu8_avgpool; |
| 337 | union xnn_qu8_avgpool_params qu8_gavgpool; |
Marat Dukhan | 5868d80 | 2020-03-19 17:18:45 -0700 | [diff] [blame] | 338 | }; |
Marat Dukhan | dc5c148 | 2021-08-16 09:03:15 -0700 | [diff] [blame] | 339 | union xnn_s8_minmax_params s8_minmax; |
Marat Dukhan | c3065f5 | 2020-06-04 13:33:32 -0700 | [diff] [blame] | 340 | union xnn_u8_minmax_params u8_minmax; |
| 341 | } params; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 342 | enum xnn_operator_type type; |
| 343 | struct xnn_ukernel ukernel; |
| 344 | |
| 345 | struct compute_parameters compute; |
| 346 | struct compute_parameters compute2; |
| 347 | union { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 348 | struct argmax_pooling_context argmax_pooling; |
| 349 | struct average_pooling_context average_pooling; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 350 | struct channel_shuffle_context channel_shuffle; |
Marat Dukhan | 1f29b80 | 2020-05-15 23:46:39 -0700 | [diff] [blame] | 351 | struct conv2d_context conv2d; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 352 | struct dwconv2d_context dwconv2d; |
| 353 | struct dwconv_context dwconv; |
Marat Dukhan | ad71b9a | 2020-11-20 00:01:51 -0800 | [diff] [blame] | 354 | struct depthtospace2d_chw2hwc_context depthtospace2d_chw; |
Marat Dukhan | 0e52117 | 2020-11-25 13:10:04 -0800 | [diff] [blame] | 355 | struct depthtospace2d_hwc_context depthtospace2d_hwc; |
Marat Dukhan | ca2733c | 2019-11-15 23:21:17 -0800 | [diff] [blame] | 356 | struct elementwise_binary_context elementwise_binary; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 357 | struct gemm_context gemm; |
Marat Dukhan | efc47b8 | 2019-11-18 09:25:38 -0800 | [diff] [blame] | 358 | struct global_average_pooling_nwc_context global_average_pooling_nwc; |
| 359 | struct global_average_pooling_ncw_context global_average_pooling_ncw; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 360 | struct igemm_context igemm; |
| 361 | struct lut_contiguous_context lut_contiguous; |
| 362 | struct lut_strided_context lut_strided; |
| 363 | struct max_pooling_context max_pooling; |
Marat Dukhan | 4662b19 | 2020-05-21 15:52:03 -0700 | [diff] [blame] | 364 | struct pad_context pad; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 365 | struct pixelwise_average_pooling_context pixelwise_average_pooling; |
| 366 | struct prelu_context prelu; |
Marat Dukhan | 6972249 | 2019-11-11 19:55:50 -0800 | [diff] [blame] | 367 | struct resize_bilinear_context resize_bilinear; |
Artsiom Ablavatski | 9791810 | 2020-10-27 15:52:59 -0700 | [diff] [blame] | 368 | struct resize_bilinear_chw_context resize_bilinear_chw; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 369 | struct spmm_context spmm; |
| 370 | struct subconv_context subconv; |
Marat Dukhan | 2995427 | 2020-02-13 17:56:11 -0800 | [diff] [blame] | 371 | struct subgemm_context subgemm; |
Marat Dukhan | fd8e689 | 2020-01-27 15:25:25 -0800 | [diff] [blame] | 372 | struct f32_three_pass_softmax_context f32_three_pass_softmax; |
| 373 | struct u8_softmax_context u8_softmax; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 374 | struct univector_contiguous_context univector_contiguous; |
| 375 | struct univector_strided_context univector_strided; |
| 376 | struct unpooling_context unpooling; |
| 377 | struct vmulcaddc_context vmulcaddc; |
| 378 | } context; |
| 379 | |
| 380 | enum xnn_run_state state; |
Marat Dukhan | 9cbaa63 | 2020-11-24 21:28:50 -0800 | [diff] [blame] | 381 | }; |