blob: 06cd2ffeb14c34933391dd8c4607bfa562071285 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14
15#include <pthreadpool.h>
16
17#ifdef __cplusplus
18extern "C" {
19#endif
20
Marat Dukhan5609a082019-10-07 10:56:58 -070021/// The number of bytes XNNPACK may read beyond array bounds.
XNNPACK Team965272b2020-10-23 21:10:15 -070022/// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK.
Marat Dukhan5609a082019-10-07 10:56:58 -070023///
24/// Note: XNNPACK reads, but never writes beyond array bounds.
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#define XNN_EXTRA_BYTES 16
26
Marat Dukhanca2733c2019-11-15 23:21:17 -080027/// Maximum number of dimensions in tensor shape.
Marat Dukhanfc2b96e2019-12-03 12:04:04 -080028#define XNN_MAX_TENSOR_DIMS 6
Marat Dukhanca2733c2019-11-15 23:21:17 -080029
Marat Dukhan7332e832020-12-06 23:26:11 -080030/// Allow sparse inference in a Runtime.
31///
32/// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it.
33#define XNN_FLAG_SPARSE_INFERENCE 0x00000001
34
Marat Dukhan5609a082019-10-07 10:56:58 -070035/// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
Marat Dukhandd69f0b2019-10-04 19:40:03 -070036#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
XNNPACK Teamb455b122019-09-27 18:10:33 -070037
Marat Dukhanc4f0ff92019-12-03 14:59:08 -080038/// Assume transposed weights in a fully connected operator.
39#define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001
40
Marat Dukhan5609a082019-10-07 10:56:58 -070041/// The operator assumes NHWC layout for the input, regardless of the output layout.
XNNPACK Teamb455b122019-09-27 18:10:33 -070042#define XNN_FLAG_INPUT_NHWC 0x00000002
43
Marat Dukhan8440fde2019-10-24 12:46:13 -070044/// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size.
45#define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004
46
Marat Dukhan853bb7a2021-04-15 15:52:25 -070047/// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor.
Marat Dukhan38c07ec2020-04-23 16:44:32 -070048#define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004
49
Marat Dukhan69722492019-11-11 19:55:50 -080050/// Match behaviour of TensorFlow 1.x.
51#define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004
52
53/// Align corners of input and output images in resize operations.
54#define XNN_FLAG_ALIGN_CORNERS 0x00000008
55
Marat Dukhan942359e2021-06-09 00:38:56 -070056/// Yield worker threads of the thread pool to the system scheduler after the inference.
57#define XNN_FLAG_YIELD_WORKERS 0x00000010
58
Marat Dukhan5609a082019-10-07 10:56:58 -070059/// Status code for any XNNPACK function call.
XNNPACK Teamb455b122019-09-27 18:10:33 -070060enum xnn_status {
Marat Dukhan5609a082019-10-07 10:56:58 -070061 /// The call succeeded, and all output arguments now contain valid data.
XNNPACK Teamb455b122019-09-27 18:10:33 -070062 xnn_status_success = 0,
63 xnn_status_uninitialized = 1,
64 xnn_status_invalid_parameter = 2,
65 xnn_status_invalid_state = 3,
66 xnn_status_unsupported_parameter = 4,
67 xnn_status_unsupported_hardware = 5,
68 xnn_status_out_of_memory = 6,
69};
70
Marat Dukhan04f03be2019-11-19 12:36:47 -080071struct xnn_allocator {
72 /// User-specified pointer that will be passed as-is to all functions in this structure.
73 void* context;
74 /// Pointer to a function to be called for general memory allocation.
75 ///
76 /// @param context - The user-specified pointer from xnn_allocator structure.
77 /// @param size - The size of the memory block to allocate, in bytes.
78 ///
79 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
80 /// If allocation fails, the function must return NULL.
81 void* (*allocate)(void* context, size_t size);
82 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously
83 /// allocated memory block. The content of the old memory block is copied to the new memory block.
84 ///
85 /// @param context - The user-specified pointer from xnn_allocator structure.
86 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
87 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call.
88 /// @param size - The new size of the memory block to allocate, in bytes.
89 ///
90 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous
91 /// memory block.
92 /// If allocation fails, the function must return NULL, but must not release the previous memory block.
93 void* (*reallocate)(void* context, void* pointer, size_t size);
94 /// Pointer to a function to be called for general memory de-allocation.
95 ///
96 /// @param context - The user-specified pointer from xnn_allocator structure.
97 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
98 /// If the pointer is NULL, the @ref deallocate call is a no-op.
99 void (*deallocate)(void* context, void* pointer);
100 /// Pointer to a function to be called for aligned memory allocation.
101 ///
102 /// @param context - The user-specified pointer from xnn_allocator structure.
103 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2.
104 /// @param size - The size of the memory block to allocate, in bytes.
105 ///
106 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
107 /// If allocation fails, the function must return NULL.
108 void* (*aligned_allocate)(void* context, size_t alignment, size_t size);
109 /// Pointer to a function to be called for aligned memory de-allocation.
110 ///
111 /// @param context - The user-specified pointer from xnn_allocator structure.
112 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL.
113 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op.
114 void (*aligned_deallocate)(void* context, void* pointer);
115};
116
Marat Dukhan5609a082019-10-07 10:56:58 -0700117/// Initialize XNNPACK library.
118///
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700119/// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures
120/// depending on the host processor. Initialization can be time-consuming.
Marat Dukhan5609a082019-10-07 10:56:58 -0700121///
Marat Dukhan04f03be2019-11-19 12:36:47 -0800122/// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation.
123/// If this argument is NULL, system-provided memory management functions (e.g. malloc/free)
124/// will be used.
125///
slowy07ab1127f2021-07-27 08:23:22 +0700126/// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use.
Marat Dukhan5609a082019-10-07 10:56:58 -0700127/// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition.
128/// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the
129/// minimum hardware requirements for XNNPACK. E.g. this may happen on x86
130/// processors without SSE2 extension, or on 32-bit ARM processors without
131/// the NEON SIMD extension.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800132enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700133
Marat Dukhan5609a082019-10-07 10:56:58 -0700134/// Deinitialize XNNPACK library.
135///
136/// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call.
137///
138/// @retval xnn_status_success - deinitialization call succeeded.
XNNPACK Teamb455b122019-09-27 18:10:33 -0700139enum xnn_status xnn_deinitialize(void);
140
Marat Dukhandd2b5882020-02-06 15:12:26 -0800141/// Subgraph is an abstract representation of a neural network model.
142/// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800143typedef struct xnn_subgraph* xnn_subgraph_t;
144
Marat Dukhandd2b5882020-02-06 15:12:26 -0800145/// Create a empty Subgraph object.
146///
147/// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation.
148/// The Subgraph object would avoid creating internal Value IDs in the
149/// [0, reserved_value_ids-1] range.
150/// @param flags - binary features of the subgraph. No supported flags are currently defined.
151/// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon
152/// successful return.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800153enum xnn_status xnn_create_subgraph(
154 uint32_t external_value_ids,
155 uint32_t flags,
156 xnn_subgraph_t* subgraph_out);
157
Marat Dukhandd2b5882020-02-06 15:12:26 -0800158/// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph.
159///
160/// @param subgraph - the Subgraph object to destroy.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800161enum xnn_status xnn_delete_subgraph(
162 xnn_subgraph_t subgraph);
163
164#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001
165#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
166
167#define XNN_INVALID_VALUE_ID UINT32_MAX
168
Marat Dukhandd2b5882020-02-06 15:12:26 -0800169/// Type of elements in a Value object.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800170enum xnn_datatype {
Marat Dukhandd2b5882020-02-06 15:12:26 -0800171 /// Invalid data type. Valid Values never have this datatype.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800172 xnn_datatype_invalid = 0,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800173 /// IEEE754 single-precision floating-point.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800174 xnn_datatype_fp32 = 1,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800175 /// IEEE754 half-precision floating-point.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800176 xnn_datatype_fp16 = 2,
Marat Dukhan30757192021-03-29 18:19:13 -0700177 /// Quantized 8-bit signed integer with shared per-Value quantization parameters.
Marat Dukhan43ebc052021-03-29 17:49:52 -0700178 xnn_datatype_qint8 = 3,
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700179 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
180 xnn_datatype_quint8 = 4,
Marat Dukhan30757192021-03-29 18:19:13 -0700181 /// Quantized 32-bit signed integer with shared per-Value quantization parameters.
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700182 xnn_datatype_qint32 = 5,
Marat Dukhana11a1e82021-06-24 13:10:13 -0700183 /// Quantized 8-bit signed integer with shared per-channel quantization parameters.
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700184 xnn_datatype_qcint8 = 6,
Marat Dukhana11a1e82021-06-24 13:10:13 -0700185 /// Quantized 32-bit signed integer with shared per-channel quantization parameters.
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700186 xnn_datatype_qcint32 = 7,
Marat Dukhan1d75a542020-02-03 12:23:01 -0800187};
188
Marat Dukhandd2b5882020-02-06 15:12:26 -0800189/// Define a tensor-type Value and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800190///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800191/// @param subgraph - a Subgraph object that will own the created Value.
192/// @param datatype - type of the tensor elements.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800193/// @param num_dims - number of dimensions in the shape.
194/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800195/// XNNPACK does not keep any pointers to this array after the function returns.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800196/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800197/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
198/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
199/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
200/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
201/// created for the Value.
202/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
203/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
204/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
205/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800206enum xnn_status xnn_define_tensor_value(
207 xnn_subgraph_t subgraph,
208 enum xnn_datatype datatype,
209 size_t num_dims,
210 const size_t* dims,
211 const void* data,
212 uint32_t external_id,
213 uint32_t flags,
214 uint32_t* id_out);
215
Marat Dukhan43ebc052021-03-29 17:49:52 -0700216/// Define a quantized tensor-type Value and add it to a Subgraph.
217///
218/// @param subgraph - a Subgraph object that will own the created Value.
219/// @param datatype - type of the tensor elements.
Marat Dukhan30757192021-03-29 18:19:13 -0700220/// @param zero_point - offset from zero to subtract from the quantized elements in the Value.
221/// @param scale - multiplication factor to convert quantized elements to real representation.
Marat Dukhan43ebc052021-03-29 17:49:52 -0700222/// @param num_dims - number of dimensions in the shape.
223/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
224/// XNNPACK does not keep any pointers to this array after the function returns.
225/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
226/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
227/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
228/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
229/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
230/// created for the Value.
231/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
232/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
233/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
234/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
235enum xnn_status xnn_define_quantized_tensor_value(
236 xnn_subgraph_t subgraph,
237 enum xnn_datatype datatype,
238 int32_t zero_point,
239 float scale,
240 size_t num_dims,
241 const size_t* dims,
242 const void* data,
243 uint32_t external_id,
244 uint32_t flags,
245 uint32_t* id_out);
246
Marat Dukhana11a1e82021-06-24 13:10:13 -0700247/// Define a channelwise quantized tensor-type Value and add it to a Subgraph.
248///
249/// @param subgraph - a Subgraph object that will own the created Value.
250/// @param datatype - type of the tensor elements.
251/// @param scale - per-channel multiplication factors to convert quantized elements to real representation.
252/// @param num_dims - number of dimensions in the shape.
253/// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters.
254/// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution,
255/// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in
256/// the Depthwise Convolution operators.
257/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
258/// XNNPACK does not keep any pointers to this array after the function returns.
259/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
260/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
261/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
262/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
263/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
264/// created for the Value.
265/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
266/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
267/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
268/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
269enum xnn_status xnn_define_channelwise_quantized_tensor_value(
270 xnn_subgraph_t subgraph,
271 enum xnn_datatype datatype,
272 const float* scale,
273 size_t num_dims,
274 size_t channel_dim,
275 const size_t* dims,
276 const void* data,
277 uint32_t external_id,
278 uint32_t flags,
279 uint32_t* id_out);
280
Marat Dukhandd2b5882020-02-06 15:12:26 -0800281/// Define a 2D Convolution Node and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800282///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800283/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan15d1f512020-02-24 08:06:33 -0800284/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
285/// flag is specified.
286/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
287/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
288/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
289/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
290/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
291/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800292/// @param kernel_height - kernel (filter) height.
293/// @param kernel_width - kernel (filter) width.
294/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
295/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
296/// @param dilation_height - dilation of kernel elements along the height dimension.
297/// @param dilation_width - dilation of kernel elements along the width dimension.
298/// @param groups - number of convolution groups.
299/// @param group_input_channels - number of input channels per group.
300/// @param group_output_channels - number of output channels per group.
301/// @param output_min - lower bound for clipping output values.
302/// @param output_max - upper bound for clipping output values.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800303/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
304/// with [N, IH, IW, groups * group_input_channels] dimensions
305/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
306/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
307/// dimensions.
308/// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
309/// [groups * group_output_channels] dimensions.
310/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
311/// with [N, OH, OW, groups * group_output_channels] dimensions.
312/// @param flags - binary features of the 2D Convolution Node. The only currently supported values is
313/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800314enum xnn_status xnn_define_convolution_2d(
315 xnn_subgraph_t subgraph,
316 uint32_t input_padding_top,
317 uint32_t input_padding_right,
318 uint32_t input_padding_bottom,
319 uint32_t input_padding_left,
320 uint32_t kernel_height,
321 uint32_t kernel_width,
322 uint32_t subsampling_height,
323 uint32_t subsampling_width,
324 uint32_t dilation_height,
325 uint32_t dilation_width,
326 uint32_t groups,
327 size_t group_input_channels,
328 size_t group_output_channels,
329 float output_min,
330 float output_max,
331 uint32_t input_id,
332 uint32_t filter_id,
333 uint32_t bias_id,
334 uint32_t output_id,
335 uint32_t flags);
336
Marat Dukhanf5870842020-04-27 18:19:54 -0700337/// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph.
338///
339/// @param subgraph - a Subgraph object that will own the created Node.
340/// @param padding_top - implicit padding above 2D output data.
341/// @param padding_right - implicit padding to the right of 2D output data.
342/// @param padding_bottom - implicit padding below 2D output data.
343/// @param padding_left - implicit padding to the left of 2D output data.
344/// @param adjustment_height - additional elements in the bottom of the 2D output data.
345/// @param adjustment_width - additional elements to the right of the 2D output data.
346/// @param kernel_height - kernel (filter) height.
347/// @param kernel_width - kernel (filter) width.
348/// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride).
349/// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride).
350/// @param dilation_height - dilation of kernel elements along the height dimension.
351/// @param dilation_width - dilation of kernel elements along the width dimension.
352/// @param groups - number of convolution groups.
353/// @param group_input_channels - number of input channels per group.
354/// @param group_output_channels - number of output channels per group.
355/// @param output_min - lower bound for clipping output values.
356/// @param output_max - upper bound for clipping output values.
357/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
358/// with [N, IH, IW, groups * group_input_channels] dimensions
359/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
360/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
361/// dimensions.
Marat Dukhana9992252021-04-15 16:47:24 -0700362/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If
363/// present, the bias tensor must be a 1D tensor defined in the @a subgraph with
Marat Dukhanf5870842020-04-27 18:19:54 -0700364/// [groups * group_output_channels] dimensions.
365/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
366/// with [N, OH, OW, groups * group_output_channels] dimensions.
367/// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined.
368enum xnn_status xnn_define_deconvolution_2d(
369 xnn_subgraph_t subgraph,
370 uint32_t padding_top,
371 uint32_t padding_right,
372 uint32_t padding_bottom,
373 uint32_t padding_left,
374 uint32_t adjustment_height,
375 uint32_t adjustment_width,
376 uint32_t kernel_height,
377 uint32_t kernel_width,
378 uint32_t upsampling_height,
379 uint32_t upsampling_width,
380 uint32_t dilation_height,
381 uint32_t dilation_width,
382 uint32_t groups,
383 size_t group_input_channels,
384 size_t group_output_channels,
385 float output_min,
386 float output_max,
387 uint32_t input_id,
388 uint32_t filter_id,
389 uint32_t bias_id,
390 uint32_t output_id,
391 uint32_t flags);
392
Marat Dukhandd2b5882020-02-06 15:12:26 -0800393/// Define a 2D Depthwise Convolution Node and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800394///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800395/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan15d1f512020-02-24 08:06:33 -0800396/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
397/// flag is specified.
398/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
399/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
400/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
401/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
402/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
403/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800404/// @param kernel_height - kernel (filter) height.
405/// @param kernel_width - kernel (filter) width.
406/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
407/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
408/// @param dilation_height - dilation of kernel elements along the height dimension.
409/// @param dilation_width - dilation of kernel elements along the width dimension.
410/// @param depth_multiplier - ratio of output channels to input channels.
411/// @param input_channels - number of input channels.
412/// @param output_min - lower bound for clipping output values.
413/// @param output_max - upper bound for clipping output values.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800414/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
415/// with [N, IH, IW, input_channels] dimensions
416/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
417/// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
Marat Dukhana9992252021-04-15 16:47:24 -0700418/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without
419/// a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with
Marat Dukhandd2b5882020-02-06 15:12:26 -0800420/// [input_channels * depth_multiplier] dimensions.
421/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
422/// with [N, OH, OW, input_channels * depth_multiplier] dimensions.
423/// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is
424/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800425enum xnn_status xnn_define_depthwise_convolution_2d(
426 xnn_subgraph_t subgraph,
427 uint32_t input_padding_top,
428 uint32_t input_padding_right,
429 uint32_t input_padding_bottom,
430 uint32_t input_padding_left,
431 uint32_t kernel_height,
432 uint32_t kernel_width,
433 uint32_t subsampling_height,
434 uint32_t subsampling_width,
435 uint32_t dilation_height,
436 uint32_t dilation_width,
437 uint32_t depth_multiplier,
438 size_t input_channels,
439 float output_min,
440 float output_max,
441 uint32_t input_id,
442 uint32_t filter_id,
443 uint32_t bias_id,
444 uint32_t output_id,
445 uint32_t flags);
446
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700447/// Define a Depth To Space Node and add it to a Subgraph.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800448///
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700449/// The Depth To Space Node rearranges data from depth into blocks of spatial data (a reverse transform to
450/// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values
451/// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times
452/// smaller than that of the input.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800453///
454/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700455/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
456/// with [N, IH, IW, OC * block_size * block_size] dimensions.
457/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
458/// with [N, IH * block_size, IW * block_size, OC] dimensions.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800459/// @param block_size - the size of the spatial block.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700460/// @param flags - binary features of the input_channels Node. No supported flags are currently defined.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800461enum xnn_status xnn_define_depth_to_space(
462 xnn_subgraph_t subgraph,
463 uint32_t input_id,
464 uint32_t output_id,
465 uint32_t block_size,
466 uint32_t flags);
467
Marat Dukhana059b7d2020-06-11 11:41:27 -0700468/// Define a 2D Global Average Pooling Node and add it to a Subgraph.
469///
470/// @param subgraph - a Subgraph object that will own the created Node.
471/// @param output_min - lower bound for clipping output values.
472/// @param output_max - upper bound for clipping output values.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700473/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
474/// with [N, H, W, C] dimensions
475/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
476/// with [N, 1, 1, C] dimensions.
477/// @param flags - binary features of the 2D Global Average Pooling Node. No supported flags are currently defined.
Marat Dukhana059b7d2020-06-11 11:41:27 -0700478enum xnn_status xnn_define_global_average_pooling_2d(
479 xnn_subgraph_t subgraph,
480 float output_min,
481 float output_max,
482 uint32_t input_id,
483 uint32_t output_id,
484 uint32_t flags);
485
Marat Dukhan21d3bd62020-02-29 00:39:39 -0800486/// Define a 2D Average Pooling Node and add it to a Subgraph.
487///
488/// @param subgraph - a Subgraph object that will own the created Node.
489/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
490/// flag is specified.
491/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
492/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
493/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
494/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
495/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
496/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
497/// @param pooling_height - pooling (kernel) height.
498/// @param pooling_width - pooling (kernel) width.
499/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
500/// to vertically adjacent output pixels.
501/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
502/// to horizontally adjacent output pixels.
503/// @param output_min - lower bound for clipping output values.
504/// @param output_max - upper bound for clipping output values.
505/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
506/// with [N, IH, IW, channels] dimensions
507/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
508/// with [N, OH, OW, channels] dimensions.
509/// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is
510/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
511enum xnn_status xnn_define_average_pooling_2d(
512 xnn_subgraph_t subgraph,
513 uint32_t input_padding_top,
514 uint32_t input_padding_right,
515 uint32_t input_padding_bottom,
516 uint32_t input_padding_left,
517 uint32_t pooling_height,
518 uint32_t pooling_width,
519 uint32_t stride_height,
520 uint32_t stride_width,
521 float output_min,
522 float output_max,
523 uint32_t input_id,
524 uint32_t output_id,
525 uint32_t flags);
526
Marat Dukhan38c07ec2020-04-23 16:44:32 -0700527/// Define a Fully Connected Node and add it to a Subgraph.
528///
529/// @param subgraph - a Subgraph object that will own the created Node.
530/// @param output_min - lower bound for clipping output values.
531/// @param output_max - upper bound for clipping output values.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700532/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the
533/// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least
534/// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if
535/// input is a 2D tensor, it must have [batch_size, input_channels] dimensions.
536/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be
537/// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of
538/// [num_input_elements] dimensions, then reshaped into a 2D tensor of
539/// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the
540/// total number of elements in the input tensor.
541/// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph.
542/// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have
543/// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is
544/// specified, the filter tensor must have [input_channels, output_channels] dimensions.
545/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias.
546/// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels]
547/// dimensions.
548/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph.
549/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same
550/// dimensionality as the input tensor, all its dimensions but the last one must match the
551/// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must
552/// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output
553/// must be a 2D tensor of [batch_size, output_channels] dimensions.
554/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of
555/// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the
556/// total number of elements in the input tensor.
557/// @param flags - binary features of the Fully Connected Node. The only currently supported values are
558/// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS.
Marat Dukhana9992252021-04-15 16:47:24 -0700559enum xnn_status xnn_define_fully_connected(
560 xnn_subgraph_t subgraph,
561 float output_min,
562 float output_max,
563 uint32_t input_id,
564 uint32_t filter_id,
565 uint32_t bias_id,
566 uint32_t output_id,
567 uint32_t flags);
Marat Dukhan38c07ec2020-04-23 16:44:32 -0700568
Marat Dukhan21d3bd62020-02-29 00:39:39 -0800569/// Define a 2D Max Pooling Node and add it to a Subgraph.
570///
571/// @param subgraph - a Subgraph object that will own the created Node.
572/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
573/// flag is specified.
574/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
575/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
576/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
577/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
578/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
579/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
580/// @param pooling_height - pooling (kernel) height.
581/// @param pooling_width - pooling (kernel) width.
582/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
583/// to vertically adjacent output pixels.
584/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
585/// to horizontally adjacent output pixels.
586/// @param dilation_height - dilation of pooling elements along the height dimension.
587/// @param dilation_width - dilation of pooling elements along the width dimension.
588/// @param output_min - lower bound for clipping output values.
589/// @param output_max - upper bound for clipping output values.
590/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
591/// with [N, IH, IW, channels] dimensions
592/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
593/// with [N, OH, OW, channels] dimensions.
594/// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is
595/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
596enum xnn_status xnn_define_max_pooling_2d(
597 xnn_subgraph_t subgraph,
598 uint32_t input_padding_top,
599 uint32_t input_padding_right,
600 uint32_t input_padding_bottom,
601 uint32_t input_padding_left,
602 uint32_t pooling_height,
603 uint32_t pooling_width,
604 uint32_t stride_height,
605 uint32_t stride_width,
606 uint32_t dilation_height,
607 uint32_t dilation_width,
608 float output_min,
609 float output_max,
610 uint32_t input_id,
611 uint32_t output_id,
612 uint32_t flags);
613
Marat Dukhan5cb16e72020-05-05 16:41:57 -0700614/// Define a 2D ArgMax Pooling Node and add it to a Subgraph.
615///
616/// @param subgraph - a Subgraph object that will own the created Node.
617/// @param input_padding_top - implicit zero-padding above 2D input data.
618/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
619/// @param input_padding_bottom - implicit zero-padding below 2D input data.
620/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
621/// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value.
622/// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value.
623/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
624/// with [N, IH, IW, channels] dimensions
625/// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must
626/// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions.
627/// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The
628/// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels]
629/// dimensions.
630/// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined.
631enum xnn_status xnn_define_argmax_pooling_2d(
632 xnn_subgraph_t subgraph,
633 uint32_t input_padding_top,
634 uint32_t input_padding_right,
635 uint32_t input_padding_bottom,
636 uint32_t input_padding_left,
637 uint32_t pooling_height,
638 uint32_t pooling_width,
639 uint32_t input_id,
640 uint32_t output_value_id,
641 uint32_t output_index_id,
642 uint32_t flags);
643
644/// Define a 2D UnPooling Node and add it to a Subgraph.
645///
646/// @param subgraph - a Subgraph object that will own the created Node.
647/// @param padding_top - implicit padding above 2D output data.
648/// @param padding_right - implicit padding to the right of 2D output data.
649/// @param padding_bottom - implicit padding below 2D output data.
650/// @param padding_left - implicit padding to the left of 2D output data.
651/// @param pooling_height - height of the pooling window.
652/// @param pooling_width - width of the pooling window.
653/// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor
654/// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions.
655/// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by
656/// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with
657/// [N, IH, IW, channels] dimensions.
658/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
659/// with [N, OH, OW, channels] dimensions.
660/// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined.
661enum xnn_status xnn_define_unpooling_2d(
662 xnn_subgraph_t subgraph,
663 uint32_t padding_top,
664 uint32_t padding_right,
665 uint32_t padding_bottom,
666 uint32_t padding_left,
667 uint32_t pooling_height,
668 uint32_t pooling_width,
669 uint32_t input_value_id,
670 uint32_t input_index_id,
671 uint32_t output_id,
672 uint32_t flags);
673
Marat Dukhan54dcb462020-02-10 11:06:12 -0800674/// Define a 2-Input Add Node and add it to a Subgraph.
675///
676/// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules.
677///
678/// @param subgraph - a Subgraph object that will own the created Node.
679/// @param output_min - lower bound for clipping output values.
680/// @param output_max - upper bound for clipping output values.
681/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
682/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
683/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
684/// that dimension.
685/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
686/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
687/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
688/// that dimension.
689/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
690/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
691/// of the two inputs.
692/// @param flags - binary features of the Add Node. No supported flags are currently defined.
693enum xnn_status xnn_define_add2(
694 xnn_subgraph_t subgraph,
695 float output_min,
696 float output_max,
697 uint32_t input1_id,
698 uint32_t input2_id,
699 uint32_t output_id,
700 uint32_t flags);
701
702/// Define a 2-Input Multiply Node and add it to a Subgraph.
703///
704/// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules.
705///
706/// @param subgraph - a Subgraph object that will own the created Node.
707/// @param output_min - lower bound for clipping output values.
708/// @param output_max - upper bound for clipping output values.
709/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
710/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
711/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
712/// that dimension.
713/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
714/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
715/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
716/// that dimension.
717/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
718/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
719/// of the two inputs.
720/// @param flags - binary features of the Multiply Node. No supported flags are currently defined.
721enum xnn_status xnn_define_multiply2(
722 xnn_subgraph_t subgraph,
723 float output_min,
724 float output_max,
725 uint32_t input1_id,
726 uint32_t input2_id,
727 uint32_t output_id,
728 uint32_t flags);
729
Marat Dukhan9d3a4592020-06-05 16:52:42 -0700730/// Define a Subtract Node and add it to a Subgraph.
731///
732/// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules.
733///
734/// @param subgraph - a Subgraph object that will own the created Node.
735/// @param output_min - lower bound for clipping output values.
736/// @param output_max - upper bound for clipping output values.
737/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
738/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
739/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
740/// that dimension.
741/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
742/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
743/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
744/// that dimension.
745/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
746/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
747/// of the two inputs.
748/// @param flags - binary features of the Subtract Node. No supported flags are currently defined.
749enum xnn_status xnn_define_subtract(
750 xnn_subgraph_t subgraph,
751 float output_min,
752 float output_max,
753 uint32_t input1_id,
754 uint32_t input2_id,
755 uint32_t output_id,
756 uint32_t flags);
757
758/// Define a Divide Node and add it to a Subgraph.
759///
760/// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules.
761///
762/// @param subgraph - a Subgraph object that will own the created Node.
763/// @param output_min - lower bound for clipping output values.
764/// @param output_max - upper bound for clipping output values.
765/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
766/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
767/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
768/// that dimension.
769/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
770/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
771/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
772/// that dimension.
773/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
774/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
775/// of the two inputs.
776/// @param flags - binary features of the Divide Node. No supported flags are currently defined.
777enum xnn_status xnn_define_divide(
778 xnn_subgraph_t subgraph,
779 float output_min,
780 float output_max,
781 uint32_t input1_id,
782 uint32_t input2_id,
783 uint32_t output_id,
784 uint32_t flags);
785
786/// Define a 2-Input Maximum Node and add it to a Subgraph.
787///
788/// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules.
789///
790/// @param subgraph - a Subgraph object that will own the created Node.
791/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
792/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
793/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
794/// that dimension.
795/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
796/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
797/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
798/// that dimension.
799/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
800/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
801/// of the two inputs.
802/// @param flags - binary features of the Maximum Node. No supported flags are currently defined.
803enum xnn_status xnn_define_maximum2(
804 xnn_subgraph_t subgraph,
805 uint32_t input1_id,
806 uint32_t input2_id,
807 uint32_t output_id,
808 uint32_t flags);
809
810/// Define a 2-Input Minimum Node and add it to a Subgraph.
811///
812/// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules.
813///
814/// @param subgraph - a Subgraph object that will own the created Node.
815/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
816/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
817/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
818/// that dimension.
819/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
820/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
821/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
822/// that dimension.
823/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
824/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
825/// of the two inputs.
826/// @param flags - binary features of the Minimum Node. No supported flags are currently defined.
827enum xnn_status xnn_define_minimum2(
828 xnn_subgraph_t subgraph,
829 uint32_t input1_id,
830 uint32_t input2_id,
831 uint32_t output_id,
832 uint32_t flags);
833
834/// Define a Squared Difference Node and add it to a Subgraph.
835///
836/// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting
837/// rules.
838///
839/// @param subgraph - a Subgraph object that will own the created Node.
840/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
841/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
842/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
843/// that dimension.
844/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
845/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
846/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
847/// that dimension.
848/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
849/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
850/// of the two inputs.
851/// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined.
852enum xnn_status xnn_define_squared_difference(
853 xnn_subgraph_t subgraph,
854 uint32_t input1_id,
855 uint32_t input2_id,
856 uint32_t output_id,
857 uint32_t flags);
858
Marat Dukhanab2946c2020-05-21 20:04:13 -0700859/// Define a Constant Pad Node with static padding specification and add it to a Subgraph.
860///
861/// @param subgraph - a Subgraph object that will own the created Node.
862/// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array
863/// must have as many elements as the the number of dimensions in the input tensor.
864/// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700865/// must have as many elements as the the number of dimensions in the input tensor.
Marat Dukhanab2946c2020-05-21 20:04:13 -0700866/// @param padding_value - constant value used to initialize padding elements.
867/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
868/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
869/// shape must match the shape of the input tensor with padding.
Marat Dukhand27202d2020-07-09 23:43:40 -0700870/// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined.
Marat Dukhanab2946c2020-05-21 20:04:13 -0700871enum xnn_status xnn_define_static_constant_pad(
872 xnn_subgraph_t subgraph,
873 const size_t* pre_paddings,
874 const size_t* post_paddings,
875 float padding_value,
876 uint32_t input_id,
877 uint32_t output_id,
878 uint32_t flags);
879
Marat Dukhand27202d2020-07-09 23:43:40 -0700880/// Define a Reshape Node with static shape specification and add it to a Subgraph.
881///
882/// @param subgraph - a Subgraph object that will own the created Node.
883/// @param num_dims - number of shape dimensions in the output tensor.
884/// @param new_shape - shape dimensions of the output tensor.
885/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
886/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
887/// shape must match the shape of the input tensor with padding.
888/// @param flags - binary features of the Reshape Node. No supported flags are currently defined.
889enum xnn_status xnn_define_static_reshape(
890 xnn_subgraph_t subgraph,
891 size_t num_dims,
892 const size_t* new_shape,
893 uint32_t input_id,
894 uint32_t output_id,
895 uint32_t flags);
896
Marat Dukhanaff24e22020-07-23 01:43:58 -0700897/// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph.
898///
899/// @param subgraph - a Subgraph object that will own the created Node.
900/// @param new_height - height dimension of the output tensor.
901/// @param new_width - width dimension of the output tensor.
902/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700903/// with [N, H, W, C] dimensions.
Marat Dukhanaff24e22020-07-23 01:43:58 -0700904/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
905/// with [N, new_height, new_width, C] dimensions.
906/// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are
907/// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive.
908enum xnn_status xnn_define_static_resize_bilinear_2d(
909 xnn_subgraph_t subgraph,
910 size_t new_height,
911 size_t new_width,
912 uint32_t input_id,
913 uint32_t output_id,
914 uint32_t flags);
915
Marat Dukhan2fd2ba12020-02-10 13:14:45 -0800916/// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph.
917///
918/// @param subgraph - a Subgraph object that will own the created Node.
919/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700920/// with [N, H, W, channels] dimensions.
Marat Dukhan2fd2ba12020-02-10 13:14:45 -0800921/// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
922/// [channels] dimensions.
923/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
924/// with [N, H, W, channels] dimensions.
925/// @param flags - binary features of the PReLU Node. No supported flags are currently defined.
926enum xnn_status xnn_define_prelu(
927 xnn_subgraph_t subgraph,
928 uint32_t input_id,
929 uint32_t slope_id,
930 uint32_t output_id,
931 uint32_t flags);
932
Marat Dukhan5fab4092020-06-10 01:28:28 -0700933/// Define a Abs Node and add it to a Subgraph.
934///
935/// @param subgraph - a Subgraph object that will own the created Node.
936/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
937/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
938/// shape must match the shape of the input tensor.
939/// @param flags - binary features of the Abs Node. No supported flags are currently defined.
940enum xnn_status xnn_define_abs(
941 xnn_subgraph_t subgraph,
942 uint32_t input_id,
943 uint32_t output_id,
944 uint32_t flags);
945
946/// Define a Bankers' Rounding Node and add it to a Subgraph.
947///
948/// @param subgraph - a Subgraph object that will own the created Node.
949/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
950/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
951/// shape must match the shape of the input tensor.
952/// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined.
953enum xnn_status xnn_define_bankers_rounding(
954 xnn_subgraph_t subgraph,
955 uint32_t input_id,
956 uint32_t output_id,
957 uint32_t flags);
958
959/// Define a Ceiling Node and add it to a Subgraph.
960///
961/// @param subgraph - a Subgraph object that will own the created Node.
962/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
963/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
964/// shape must match the shape of the input tensor.
965/// @param flags - binary features of the Ceiling Node. No supported flags are currently defined.
966enum xnn_status xnn_define_ceiling(
967 xnn_subgraph_t subgraph,
968 uint32_t input_id,
969 uint32_t output_id,
970 uint32_t flags);
971
Marat Dukhan52bd86f2020-02-11 18:21:51 -0800972/// Define a Clamp Node and add it to a Subgraph.
973///
974/// @param subgraph - a Subgraph object that will own the created Node.
975/// @param output_min - lower bound for clipping output values.
976/// @param output_max - upper bound for clipping output values.
977/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
978/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
979/// shape must match the shape of the input tensor.
980/// @param flags - binary features of the Clamp Node. No supported flags are currently defined.
981enum xnn_status xnn_define_clamp(
982 xnn_subgraph_t subgraph,
983 float output_min,
984 float output_max,
985 uint32_t input_id,
986 uint32_t output_id,
987 uint32_t flags);
988
Marat Dukhana1600202020-12-01 22:17:16 -0800989/// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph.
990///
991/// @param subgraph - a Subgraph object that will own the created Node.
992/// @param alpha - scale factor for negative output elements.
993/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
994/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
995/// shape must match the shape of the input tensor.
996/// @param flags - binary features of the ELU Node. No supported flags are currently defined.
997enum xnn_status xnn_define_elu(
998 xnn_subgraph_t subgraph,
999 float alpha,
1000 uint32_t input_id,
1001 uint32_t output_id,
1002 uint32_t flags);
1003
Marat Dukhan5fab4092020-06-10 01:28:28 -07001004/// Define a Floor Node and add it to a Subgraph.
1005///
1006/// @param subgraph - a Subgraph object that will own the created Node.
1007/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1008/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1009/// shape must match the shape of the input tensor.
1010/// @param flags - binary features of the Floor Node. No supported flags are currently defined.
1011enum xnn_status xnn_define_floor(
1012 xnn_subgraph_t subgraph,
1013 uint32_t input_id,
1014 uint32_t output_id,
1015 uint32_t flags);
1016
Marat Dukhan52bd86f2020-02-11 18:21:51 -08001017/// Define a HardSwish Node and add it to a Subgraph.
1018///
1019/// @param subgraph - a Subgraph object that will own the created Node.
1020/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1021/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1022/// shape must match the shape of the input tensor.
1023/// @param flags - binary features of the HardSwish Node. No supported flags are currently defined.
1024enum xnn_status xnn_define_hardswish(
1025 xnn_subgraph_t subgraph,
1026 uint32_t input_id,
1027 uint32_t output_id,
1028 uint32_t flags);
1029
Marat Dukhan5bbebac2020-06-10 19:42:15 -07001030/// Define a Leaky ReLU Node and add it to a Subgraph.
1031///
1032/// @param subgraph - a Subgraph object that will own the created Node.
1033/// @param negative_slope - scale factor for negative input elements.
1034/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1035/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1036/// shape must match the shape of the input tensor.
1037/// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined.
1038enum xnn_status xnn_define_leaky_relu(
1039 xnn_subgraph_t subgraph,
1040 float negative_slope,
1041 uint32_t input_id,
1042 uint32_t output_id,
1043 uint32_t flags);
1044
Marat Dukhan5fab4092020-06-10 01:28:28 -07001045/// Define a Negate Node and add it to a Subgraph.
1046///
1047/// @param subgraph - a Subgraph object that will own the created Node.
1048/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1049/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1050/// shape must match the shape of the input tensor.
1051/// @param flags - binary features of the Negate Node. No supported flags are currently defined.
1052enum xnn_status xnn_define_negate(
1053 xnn_subgraph_t subgraph,
1054 uint32_t input_id,
1055 uint32_t output_id,
1056 uint32_t flags);
1057
Marat Dukhan52bd86f2020-02-11 18:21:51 -08001058/// Define a Sigmoid Node and add it to a Subgraph.
1059///
1060/// @param subgraph - a Subgraph object that will own the created Node.
1061/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1062/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1063/// shape must match the shape of the input tensor.
1064/// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined.
1065enum xnn_status xnn_define_sigmoid(
1066 xnn_subgraph_t subgraph,
1067 uint32_t input_id,
1068 uint32_t output_id,
1069 uint32_t flags);
1070
1071/// Define a SoftMax Node and add it to a Subgraph.
1072///
1073/// @param subgraph - a Subgraph object that will own the created Node.
1074/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at
1075/// least one dimension.
1076/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1077/// shape must match the shape of the input tensor.
1078/// @param flags - binary features of the SoftMax Node. No supported flags are currently defined.
1079enum xnn_status xnn_define_softmax(
1080 xnn_subgraph_t subgraph,
1081 uint32_t input_id,
1082 uint32_t output_id,
1083 uint32_t flags);
1084
Marat Dukhan5fab4092020-06-10 01:28:28 -07001085/// Define a Square Node and add it to a Subgraph.
1086///
1087/// @param subgraph - a Subgraph object that will own the created Node.
1088/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1089/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1090/// shape must match the shape of the input tensor.
1091/// @param flags - binary features of the Square Node. No supported flags are currently defined.
1092enum xnn_status xnn_define_square(
1093 xnn_subgraph_t subgraph,
1094 uint32_t input_id,
1095 uint32_t output_id,
1096 uint32_t flags);
1097
Marat Dukhan51a01c62020-07-09 03:26:57 -07001098/// Define a Square Root Node and add it to a Subgraph.
1099///
1100/// @param subgraph - a Subgraph object that will own the created Node.
1101/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1102/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1103/// shape must match the shape of the input tensor.
1104/// @param flags - binary features of the Square Root Node. No supported flags are currently defined.
1105enum xnn_status xnn_define_square_root(
1106 xnn_subgraph_t subgraph,
1107 uint32_t input_id,
1108 uint32_t output_id,
1109 uint32_t flags);
1110
Marat Dukhandd2b5882020-02-06 15:12:26 -08001111/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001112typedef struct xnn_runtime* xnn_runtime_t;
1113
Marat Dukhan7332e832020-12-06 23:26:11 -08001114/// Create a Runtime object from a subgraph.
Marat Dukhandd2b5882020-02-06 15:12:26 -08001115///
1116/// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or
1117/// Nodes can be added to the runtime once it is constructed.
1118/// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
1119/// pool is NULL, the computation would run on the caller thread without parallelization.
Marat Dukhan56b78a02021-06-09 01:14:12 -07001120/// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE
1121/// and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker threads would be yielded
1122/// to the system scheduler after processing the last operator in the Runtime.
Marat Dukhandd2b5882020-02-06 15:12:26 -08001123/// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
1124/// successful return. Once constructed, the Runtime object is independent of the Subgraph object
1125/// used to create it.
Marat Dukhan022c6592020-02-05 18:07:41 -08001126enum xnn_status xnn_create_runtime_v2(
1127 xnn_subgraph_t subgraph,
1128 pthreadpool_t threadpool,
1129 uint32_t flags,
1130 xnn_runtime_t* runtime_out);
1131
Marat Dukhandd2b5882020-02-06 15:12:26 -08001132enum xnn_status xnn_create_runtime(
1133 xnn_subgraph_t subgraph,
1134 xnn_runtime_t* runtime_out);
1135
Marat Dukhan1d75a542020-02-03 12:23:01 -08001136struct xnn_external_value {
1137 uint32_t id;
1138 void* data;
1139};
1140
Marat Dukhandd2b5882020-02-06 15:12:26 -08001141/// Setup data pointers for external inputs and outputs in a Runtime object.
1142///
1143/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2.
1144/// @param num_external_values - the number of external inputs and outputs specified in this call. This number must
1145/// match the number of external inputs and outputs in the runtime, i.e. all external
1146/// inputs and outputs in the runtime must be specified in one call.
1147/// @param external_values - array with location information for all external inputs and outputs in the runtime.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001148enum xnn_status xnn_setup_runtime(
1149 xnn_runtime_t runtime,
1150 size_t num_external_values,
1151 const struct xnn_external_value* external_values);
1152
Marat Dukhandd2b5882020-02-06 15:12:26 -08001153/// Execute forward pass for all operators in the runtime.
1154///
1155/// @param runtime - the Runtime object with the execution plan to invoke.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001156enum xnn_status xnn_invoke_runtime(
1157 xnn_runtime_t runtime);
1158
Marat Dukhandd2b5882020-02-06 15:12:26 -08001159/// Destroy a Runtime object, as well as operators and memory associated with it.
1160///
1161/// @param runtime - the Runtime object to destroy.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001162enum xnn_status xnn_delete_runtime(
1163 xnn_runtime_t runtime);
1164
XNNPACK Teamb455b122019-09-27 18:10:33 -07001165typedef struct xnn_operator* xnn_operator_t;
1166
Marat Dukhand6209722019-10-07 12:54:25 -07001167enum xnn_status xnn_run_operator(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001168 xnn_operator_t op,
1169 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001170
1171enum xnn_status xnn_delete_operator(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001172 xnn_operator_t op);
Marat Dukhand6209722019-10-07 12:54:25 -07001173
1174#ifndef XNN_NO_F32_OPERATORS
1175
Marat Dukhan5020b962020-06-08 13:30:10 -07001176enum xnn_status xnn_create_abs_nc_f32(
1177 size_t channels,
1178 size_t input_stride,
1179 size_t output_stride,
1180 uint32_t flags,
1181 xnn_operator_t* abs_op_out);
1182
1183enum xnn_status xnn_setup_abs_nc_f32(
1184 xnn_operator_t abs_op,
1185 size_t batch_size,
1186 const float* input,
1187 float* output,
1188 pthreadpool_t threadpool);
1189
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001190enum xnn_status xnn_create_add_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001191 float output_min,
1192 float output_max,
1193 uint32_t flags,
1194 xnn_operator_t* add_op_out);
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001195
1196enum xnn_status xnn_setup_add_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001197 xnn_operator_t add_op,
1198 size_t num_input1_dims,
1199 const size_t* input1_shape,
1200 size_t num_input2_dims,
1201 const size_t* input2_shape,
1202 const float* input1,
1203 const float* input2,
1204 float* output,
1205 pthreadpool_t threadpool);
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001206
Marat Dukhand6209722019-10-07 12:54:25 -07001207enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001208 uint32_t input_padding_top,
1209 uint32_t input_padding_right,
1210 uint32_t input_padding_bottom,
1211 uint32_t input_padding_left,
1212 uint32_t pooling_height,
1213 uint32_t pooling_width,
1214 size_t channels,
1215 size_t input_pixel_stride,
1216 size_t output_pixel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001217 uint32_t flags,
1218 xnn_operator_t* argmax_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001219
1220enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001221 xnn_operator_t argmax_pooling_op,
1222 size_t batch_size,
1223 size_t input_height,
1224 size_t input_width,
1225 const float* input,
1226 float* output,
1227 uint32_t* index,
1228 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001229
1230enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001231 uint32_t input_padding_top,
1232 uint32_t input_padding_right,
1233 uint32_t input_padding_bottom,
1234 uint32_t input_padding_left,
1235 uint32_t pooling_height,
1236 uint32_t pooling_width,
1237 uint32_t stride_height,
1238 uint32_t stride_width,
1239 size_t channels,
1240 size_t input_pixel_stride,
1241 size_t output_pixel_stride,
1242 float output_min,
1243 float output_max,
1244 uint32_t flags,
1245 xnn_operator_t* average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001246
1247enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001248 xnn_operator_t average_pooling_op,
1249 size_t batch_size,
1250 size_t input_height,
1251 size_t input_width,
1252 const float* input,
1253 float* output,
1254 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001255
Marat Dukhan64e52512020-06-09 13:41:16 -07001256enum xnn_status xnn_create_bankers_rounding_nc_f32(
1257 size_t channels,
1258 size_t input_stride,
1259 size_t output_stride,
1260 uint32_t flags,
1261 xnn_operator_t* rounding_op_out);
1262
1263enum xnn_status xnn_setup_bankers_rounding_nc_f32(
1264 xnn_operator_t rounding_op,
1265 size_t batch_size,
1266 const float* input,
1267 float* output,
1268 pthreadpool_t threadpool);
1269
1270enum xnn_status xnn_create_ceiling_nc_f32(
1271 size_t channels,
1272 size_t input_stride,
1273 size_t output_stride,
1274 uint32_t flags,
1275 xnn_operator_t* ceiling_op_out);
1276
1277enum xnn_status xnn_setup_ceiling_nc_f32(
1278 xnn_operator_t ceiling_op,
1279 size_t batch_size,
1280 const float* input,
1281 float* output,
1282 pthreadpool_t threadpool);
1283
Marat Dukhand6209722019-10-07 12:54:25 -07001284enum xnn_status xnn_create_clamp_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001285 size_t channels,
1286 size_t input_stride,
1287 size_t output_stride,
1288 float output_min,
1289 float output_max,
1290 uint32_t flags,
1291 xnn_operator_t* clamp_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001292
1293enum xnn_status xnn_setup_clamp_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001294 xnn_operator_t clamp_op,
1295 size_t batch_size,
1296 const float* input,
1297 float* output,
1298 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001299
1300enum xnn_status xnn_create_convolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001301 uint32_t input_padding_top,
1302 uint32_t input_padding_right,
1303 uint32_t input_padding_bottom,
1304 uint32_t input_padding_left,
1305 uint32_t kernel_height,
1306 uint32_t kernel_width,
1307 uint32_t subsampling_height,
1308 uint32_t subsampling_width,
1309 uint32_t dilation_height,
1310 uint32_t dilation_width,
1311 uint32_t groups,
1312 size_t group_input_channels,
1313 size_t group_output_channels,
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001314 size_t input_channel_stride,
1315 size_t output_channel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001316 const float* kernel,
1317 const float* bias,
1318 float output_min,
1319 float output_max,
1320 uint32_t flags,
1321 xnn_operator_t* convolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001322
1323enum xnn_status xnn_setup_convolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001324 xnn_operator_t convolution_op,
1325 size_t batch_size,
1326 size_t input_height,
1327 size_t input_width,
1328 const float* input,
1329 float* output,
1330 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001331
1332enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001333 uint32_t output_padding_top,
1334 uint32_t output_padding_right,
1335 uint32_t output_padding_bottom,
1336 uint32_t output_padding_left,
1337 uint32_t kernel_height,
1338 uint32_t kernel_width,
1339 uint32_t stride_height,
1340 uint32_t stride_width,
1341 uint32_t dilation_height,
1342 uint32_t dilation_width,
1343 uint32_t groups,
1344 size_t group_input_channels,
1345 size_t group_output_channels,
1346 size_t input_pixel_stride,
1347 size_t output_pixel_stride,
1348 const float* kernel,
1349 const float* bias,
1350 float output_min,
1351 float output_max,
1352 uint32_t flags,
1353 xnn_operator_t* deconvolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001354
1355enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001356 xnn_operator_t deconvolution_op,
1357 size_t batch_size,
1358 size_t input_height,
1359 size_t input_width,
1360 uint32_t adjustment_height,
1361 uint32_t adjustment_width,
1362 const float* input,
1363 float* output,
1364 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001365
Marat Dukhan69180502019-12-06 15:00:31 -08001366enum xnn_status xnn_create_divide_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001367 float output_min,
1368 float output_max,
1369 uint32_t flags,
1370 xnn_operator_t* divide_op_out);
Marat Dukhan69180502019-12-06 15:00:31 -08001371
1372enum xnn_status xnn_setup_divide_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001373 xnn_operator_t divide_op,
1374 size_t num_input1_dims,
1375 const size_t* input1_shape,
1376 size_t num_input2_dims,
1377 const size_t* input2_shape,
1378 const float* input1,
1379 const float* input2,
1380 float* output,
1381 pthreadpool_t threadpool);
Marat Dukhan69180502019-12-06 15:00:31 -08001382
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -08001383enum xnn_status xnn_create_elu_nc_f32(
1384 size_t channels,
1385 size_t input_stride,
1386 size_t output_stride,
1387 float alpha,
1388 uint32_t flags,
1389 xnn_operator_t* elu_op_out);
1390
1391enum xnn_status xnn_setup_elu_nc_f32(
1392 xnn_operator_t elu_op,
1393 size_t batch_size,
1394 const float* input,
1395 float* output,
1396 pthreadpool_t threadpool);
1397
Marat Dukhand6209722019-10-07 12:54:25 -07001398enum xnn_status xnn_create_fully_connected_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001399 size_t input_channels,
1400 size_t output_channels,
1401 size_t input_stride,
1402 size_t output_stride,
1403 const float* kernel,
1404 const float* bias,
1405 float output_min,
1406 float output_max,
1407 uint32_t flags,
1408 xnn_operator_t* fully_connected_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001409
1410enum xnn_status xnn_setup_fully_connected_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001411 xnn_operator_t fully_connected_op,
1412 size_t batch_size,
1413 const float* input,
1414 float* output,
1415 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001416
Marat Dukhan64e52512020-06-09 13:41:16 -07001417enum xnn_status xnn_create_floor_nc_f32(
1418 size_t channels,
1419 size_t input_stride,
1420 size_t output_stride,
1421 uint32_t flags,
1422 xnn_operator_t* floor_op_out);
1423
1424enum xnn_status xnn_setup_floor_nc_f32(
1425 xnn_operator_t floor_op,
1426 size_t batch_size,
1427 const float* input,
1428 float* output,
1429 pthreadpool_t threadpool);
1430
Marat Dukhand6209722019-10-07 12:54:25 -07001431enum xnn_status xnn_create_global_average_pooling_nwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001432 size_t channels,
1433 size_t input_stride,
1434 size_t output_stride,
1435 float output_min,
1436 float output_max,
1437 uint32_t flags,
1438 xnn_operator_t* global_average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001439
1440enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001441 xnn_operator_t global_average_pooling_op,
1442 size_t batch_size,
1443 size_t width,
1444 const float* input,
1445 float* output,
1446 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001447
1448enum xnn_status xnn_create_hardswish_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001449 size_t channels,
1450 size_t input_stride,
1451 size_t output_stride,
1452 uint32_t flags,
1453 xnn_operator_t* hardswish_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001454
1455enum xnn_status xnn_setup_hardswish_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001456 xnn_operator_t hardswish_op,
1457 size_t batch_size,
1458 const float* input,
1459 float* output,
1460 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001461
Marat Dukhan28813332020-06-10 18:05:38 -07001462enum xnn_status xnn_create_leaky_relu_nc_f32(
1463 size_t channels,
1464 size_t input_stride,
1465 size_t output_stride,
1466 float negative_slope,
1467 uint32_t flags,
1468 xnn_operator_t* leaky_relu_op_out);
1469
1470enum xnn_status xnn_setup_leaky_relu_nc_f32(
1471 xnn_operator_t leaky_relu_op,
1472 size_t batch_size,
1473 const float* input,
1474 float* output,
1475 pthreadpool_t threadpool);
1476
Marat Dukhand6209722019-10-07 12:54:25 -07001477enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001478 uint32_t input_padding_top,
1479 uint32_t input_padding_right,
1480 uint32_t input_padding_bottom,
1481 uint32_t input_padding_left,
1482 uint32_t pooling_height,
1483 uint32_t pooling_width,
1484 uint32_t stride_height,
1485 uint32_t stride_width,
1486 uint32_t dilation_height,
1487 uint32_t dilation_width,
1488 size_t channels,
1489 size_t input_pixel_stride,
1490 size_t output_pixel_stride,
1491 float output_min,
1492 float output_max,
1493 uint32_t flags,
1494 xnn_operator_t* max_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001495
1496enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001497 xnn_operator_t max_pooling_op,
1498 size_t batch_size,
1499 size_t input_height,
1500 size_t input_width,
1501 const float* input,
1502 float* output,
1503 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001504
Marat Dukhan79e7f842019-12-05 14:35:50 -08001505enum xnn_status xnn_create_maximum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001506 uint32_t flags,
1507 xnn_operator_t* maximum_op_out);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001508
1509enum xnn_status xnn_setup_maximum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001510 xnn_operator_t maximum_op,
1511 size_t num_input1_dims,
1512 const size_t* input1_shape,
1513 size_t num_input2_dims,
1514 const size_t* input2_shape,
1515 const float* input1,
1516 const float* input2,
1517 float* output,
1518 pthreadpool_t threadpool);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001519
1520enum xnn_status xnn_create_minimum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001521 uint32_t flags,
1522 xnn_operator_t* minimum_op_out);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001523
1524enum xnn_status xnn_setup_minimum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001525 xnn_operator_t minimum_op,
1526 size_t num_input1_dims,
1527 const size_t* input1_shape,
1528 size_t num_input2_dims,
1529 const size_t* input2_shape,
1530 const float* input1,
1531 const float* input2,
1532 float* output,
1533 pthreadpool_t threadpool);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001534
Marat Dukhanca2733c2019-11-15 23:21:17 -08001535enum xnn_status xnn_create_multiply_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001536 float output_min,
1537 float output_max,
1538 uint32_t flags,
1539 xnn_operator_t* multiply_op_out);
Marat Dukhanca2733c2019-11-15 23:21:17 -08001540
1541enum xnn_status xnn_setup_multiply_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001542 xnn_operator_t multiply_op,
1543 size_t num_input1_dims,
1544 const size_t* input1_shape,
1545 size_t num_input2_dims,
1546 const size_t* input2_shape,
1547 const float* input1,
1548 const float* input2,
1549 float* output,
1550 pthreadpool_t threadpool);
Marat Dukhanca2733c2019-11-15 23:21:17 -08001551
Marat Dukhan5020b962020-06-08 13:30:10 -07001552enum xnn_status xnn_create_negate_nc_f32(
1553 size_t channels,
1554 size_t input_stride,
1555 size_t output_stride,
1556 uint32_t flags,
1557 xnn_operator_t* negate_op_out);
1558
1559enum xnn_status xnn_setup_negate_nc_f32(
1560 xnn_operator_t negate_op,
1561 size_t batch_size,
1562 const float* input,
1563 float* output,
1564 pthreadpool_t threadpool);
1565
Marat Dukhand6209722019-10-07 12:54:25 -07001566enum xnn_status xnn_create_prelu_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001567 size_t channels,
1568 size_t input_stride,
1569 size_t output_stride,
1570 const float* negative_slope,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001571 uint32_t flags,
1572 xnn_operator_t* prelu_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001573
1574enum xnn_status xnn_setup_prelu_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001575 xnn_operator_t prelu_op,
1576 size_t batch_size,
1577 const float* input,
1578 float* output,
1579 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001580
Artsiom Ablavatski97918102020-10-27 15:52:59 -07001581enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
1582 size_t channels,
1583 size_t input_pixel_stride,
1584 size_t output_pixel_stride,
1585 uint32_t flags,
1586 xnn_operator_t* resize_op_out);
1587
1588enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
1589 xnn_operator_t resize_op,
1590 size_t batch_size,
1591 size_t input_height,
1592 size_t input_width,
1593 size_t output_height,
1594 size_t output_width,
1595 const float* input,
1596 float* output,
1597 pthreadpool_t threadpool);
1598
Marat Dukhan69722492019-11-11 19:55:50 -08001599enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001600 size_t channels,
1601 size_t input_pixel_stride,
1602 size_t output_pixel_stride,
1603 uint32_t flags,
1604 xnn_operator_t* resize_op_out);
Marat Dukhan69722492019-11-11 19:55:50 -08001605
1606enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001607 xnn_operator_t resize_op,
1608 size_t batch_size,
1609 size_t input_height,
1610 size_t input_width,
1611 size_t output_height,
1612 size_t output_width,
1613 const float* input,
1614 float* output,
1615 pthreadpool_t threadpool);
Marat Dukhan69722492019-11-11 19:55:50 -08001616
Marat Dukhan346a9e52019-11-15 09:06:30 -08001617enum xnn_status xnn_create_sigmoid_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001618 size_t channels,
1619 size_t input_stride,
1620 size_t output_stride,
1621 uint32_t flags,
1622 xnn_operator_t* sigmoid_op_out);
Marat Dukhan346a9e52019-11-15 09:06:30 -08001623
1624enum xnn_status xnn_setup_sigmoid_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001625 xnn_operator_t sigmoid_op,
1626 size_t batch_size,
1627 const float* input,
1628 float* output,
1629 pthreadpool_t threadpool);
Marat Dukhan346a9e52019-11-15 09:06:30 -08001630
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001631enum xnn_status xnn_create_softmax_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001632 size_t channels,
1633 size_t input_stride,
1634 size_t output_stride,
1635 uint32_t flags,
1636 xnn_operator_t* softmax_op_out);
Marat Dukhan1edc4542020-01-27 12:40:13 -08001637
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001638enum xnn_status xnn_setup_softmax_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001639 xnn_operator_t softmax_op,
1640 size_t batch_size,
1641 const float* input,
1642 float* output,
1643 pthreadpool_t threadpool);
Marat Dukhan1edc4542020-01-27 12:40:13 -08001644
Marat Dukhan5020b962020-06-08 13:30:10 -07001645enum xnn_status xnn_create_square_nc_f32(
1646 size_t channels,
1647 size_t input_stride,
1648 size_t output_stride,
1649 uint32_t flags,
1650 xnn_operator_t* square_op_out);
1651
1652enum xnn_status xnn_setup_square_nc_f32(
1653 xnn_operator_t square_op,
1654 size_t batch_size,
1655 const float* input,
1656 float* output,
1657 pthreadpool_t threadpool);
1658
Marat Dukhan6804bbd2020-06-30 19:26:11 -07001659enum xnn_status xnn_create_square_root_nc_f32(
1660 size_t channels,
1661 size_t input_stride,
1662 size_t output_stride,
1663 uint32_t flags,
1664 xnn_operator_t* sqrt_op_out);
1665
1666enum xnn_status xnn_setup_square_root_nc_f32(
1667 xnn_operator_t sqrt_op,
1668 size_t batch_size,
1669 const float* input,
1670 float* output,
1671 pthreadpool_t threadpool);
1672
Marat Dukhanf7399262020-06-05 10:58:44 -07001673enum xnn_status xnn_create_squared_difference_nd_f32(
1674 uint32_t flags,
1675 xnn_operator_t* squared_difference_op_out);
1676
1677enum xnn_status xnn_setup_squared_difference_nd_f32(
1678 xnn_operator_t squared_difference_op,
1679 size_t num_input1_dims,
1680 const size_t* input1_shape,
1681 size_t num_input2_dims,
1682 const size_t* input2_shape,
1683 const float* input1,
1684 const float* input2,
1685 float* output,
1686 pthreadpool_t threadpool);
1687
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001688enum xnn_status xnn_create_subtract_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001689 float output_min,
1690 float output_max,
1691 uint32_t flags,
1692 xnn_operator_t* subtract_op_out);
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001693
1694enum xnn_status xnn_setup_subtract_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001695 xnn_operator_t subtract_op,
1696 size_t num_input1_dims,
1697 const size_t* input1_shape,
1698 size_t num_input2_dims,
1699 const size_t* input2_shape,
1700 const float* input1,
1701 const float* input2,
1702 float* output,
1703 pthreadpool_t threadpool);
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001704
Marat Dukhan64e52512020-06-09 13:41:16 -07001705enum xnn_status xnn_create_truncation_nc_f32(
1706 size_t channels,
1707 size_t input_stride,
1708 size_t output_stride,
1709 uint32_t flags,
1710 xnn_operator_t* truncation_op_out);
1711
1712enum xnn_status xnn_setup_truncation_nc_f32(
1713 xnn_operator_t truncation_op,
1714 size_t batch_size,
1715 const float* input,
1716 float* output,
1717 pthreadpool_t threadpool);
1718
Marat Dukhanefc47b82019-11-18 09:25:38 -08001719#ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001720
Marat Dukhanefc47b82019-11-18 09:25:38 -08001721enum xnn_status xnn_create_convolution2d_nchw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001722 uint32_t input_padding_top,
1723 uint32_t input_padding_right,
1724 uint32_t input_padding_bottom,
1725 uint32_t input_padding_left,
1726 uint32_t kernel_height,
1727 uint32_t kernel_width,
1728 uint32_t subsampling_height,
1729 uint32_t subsampling_width,
1730 uint32_t dilation_height,
1731 uint32_t dilation_width,
1732 uint32_t groups,
1733 size_t group_input_channels,
1734 size_t group_output_channels,
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001735 size_t input_channel_stride,
1736 size_t output_channel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001737 const float* kernel,
1738 const float* bias,
1739 float output_min,
1740 float output_max,
1741 uint32_t flags,
1742 xnn_operator_t* convolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001743
Marat Dukhanefc47b82019-11-18 09:25:38 -08001744enum xnn_status xnn_setup_convolution2d_nchw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001745 xnn_operator_t convolution_op,
1746 size_t batch_size,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001747 size_t input_height,
1748 size_t input_width,
1749 const float* input,
1750 float* output,
1751 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001752
Marat Dukhanefc47b82019-11-18 09:25:38 -08001753enum xnn_status xnn_create_global_average_pooling_ncw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001754 size_t channels,
1755 float output_min,
1756 float output_max,
1757 uint32_t flags,
1758 xnn_operator_t* global_average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001759
Marat Dukhanefc47b82019-11-18 09:25:38 -08001760enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001761 xnn_operator_t global_average_pooling_op,
1762 size_t batch_size,
1763 size_t width,
1764 const float* input,
1765 float* output,
1766 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001767
Marat Dukhanefc47b82019-11-18 09:25:38 -08001768#endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001769
Marat Dukhand6209722019-10-07 12:54:25 -07001770#endif // XNN_NO_F32_OPERATORS
1771
1772#ifndef XNN_NO_X32_OPERATORS
1773
Marat Dukhand6209722019-10-07 12:54:25 -07001774enum xnn_status xnn_create_channel_shuffle_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001775 size_t groups,
1776 size_t group_channels,
1777 size_t input_stride,
1778 size_t output_stride,
1779 uint32_t flags,
1780 xnn_operator_t* channel_shuffle_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001781
1782enum xnn_status xnn_setup_channel_shuffle_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001783 xnn_operator_t channel_shuffle_op,
1784 size_t batch_size,
1785 const void* input,
1786 void* output,
1787 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001788
Marat Dukhan065b11e2020-05-22 09:49:41 -07001789enum xnn_status xnn_create_constant_pad_nd_x32(
Marat Dukhan4662b192020-05-21 15:52:03 -07001790 const void* padding_value,
1791 uint32_t flags,
Marat Dukhan065b11e2020-05-22 09:49:41 -07001792 xnn_operator_t* constant_pad_op_out);
Marat Dukhan4662b192020-05-21 15:52:03 -07001793
Marat Dukhan065b11e2020-05-22 09:49:41 -07001794enum xnn_status xnn_setup_constant_pad_nd_x32(
1795 xnn_operator_t constant_pad_op,
Marat Dukhan4662b192020-05-21 15:52:03 -07001796 size_t num_dims,
1797 const size_t* input_shape,
1798 const size_t* pre_padding,
1799 const size_t* post_padding,
1800 const void* input,
1801 void* output,
1802 pthreadpool_t threadpool);
1803
Marat Dukhan4e21b272020-06-04 18:45:01 -07001804enum xnn_status xnn_create_copy_nc_x32(
1805 size_t channels,
1806 size_t input_stride,
1807 size_t output_stride,
1808 uint32_t flags,
1809 xnn_operator_t* copy_op_out);
1810
1811enum xnn_status xnn_setup_copy_nc_x32(
1812 xnn_operator_t copy_op,
1813 size_t batch_size,
1814 const void* input,
1815 void* output,
1816 pthreadpool_t threadpool);
1817
Marat Dukhan0e521172020-11-25 13:10:04 -08001818enum xnn_status xnn_create_depth_to_space_nhwc_x32(
1819 size_t output_channels,
1820 size_t input_channel_stride,
1821 size_t output_channel_stride,
1822 uint32_t block_size,
1823 uint32_t flags,
1824 xnn_operator_t* depth_to_space_op_out);
1825
1826enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
1827 xnn_operator_t depth_to_space_op,
1828 size_t batch_size,
1829 size_t input_height,
1830 size_t input_width,
1831 const void* input,
1832 void* output,
1833 pthreadpool_t threadpool);
1834
Marat Dukhanb4ac61d2020-11-12 12:08:30 -08001835enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
Marat Dukhanbb781b62020-11-12 13:34:05 -08001836 size_t output_channels,
Marat Dukhan9cbaa632020-11-24 21:28:50 -08001837 size_t input_channel_stride,
1838 size_t output_channel_stride,
Marat Dukhanb4ac61d2020-11-12 12:08:30 -08001839 uint32_t block_size,
1840 uint32_t flags,
1841 xnn_operator_t* depth_to_space_op_out);
1842
1843enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
1844 xnn_operator_t depth_to_space_op,
1845 size_t batch_size,
1846 size_t input_height,
1847 size_t input_width,
Marat Dukhanb4ac61d2020-11-12 12:08:30 -08001848 const void* input,
1849 void* output,
1850 pthreadpool_t threadpool);
1851
Marat Dukhand6209722019-10-07 12:54:25 -07001852enum xnn_status xnn_create_unpooling2d_nhwc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001853 uint32_t input_padding_top,
1854 uint32_t input_padding_right,
1855 uint32_t input_padding_bottom,
1856 uint32_t input_padding_left,
1857 uint32_t pooling_height,
1858 uint32_t pooling_width,
1859 size_t channels,
1860 size_t input_pixel_stride,
1861 size_t output_pixel_stride,
1862 uint32_t flags,
1863 xnn_operator_t* unpooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001864
1865enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001866 xnn_operator_t unpooling_op,
1867 size_t batch_size,
1868 size_t input_height,
1869 size_t input_width,
1870 const void* input,
1871 const uint32_t* index,
1872 void* output,
1873 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001874
1875#endif // XNN_NO_X32_OPERATORS
1876
Frank Barchard0ccccf12020-06-22 15:21:45 -07001877#ifndef XNN_NO_F16_OPERATORS
1878
Frank Barchard01898c02020-06-23 21:49:50 -07001879enum xnn_status xnn_create_add_nd_f16(
1880 float output_min,
1881 float output_max,
1882 uint32_t flags,
1883 xnn_operator_t* add_op_out);
1884
1885enum xnn_status xnn_setup_add_nd_f16(
1886 xnn_operator_t add_op,
1887 size_t num_input1_dims,
1888 const size_t* input1_shape,
1889 size_t num_input2_dims,
1890 const size_t* input2_shape,
1891 const void* input1,
1892 const void* input2,
1893 void* output,
1894 pthreadpool_t threadpool);
1895
Frank Barchard49b4dcc2020-06-26 14:07:19 -07001896enum xnn_status xnn_create_convolution2d_nhwc_f16(
1897 uint32_t input_padding_top,
1898 uint32_t input_padding_right,
1899 uint32_t input_padding_bottom,
1900 uint32_t input_padding_left,
1901 uint32_t kernel_height,
1902 uint32_t kernel_width,
1903 uint32_t subsampling_height,
1904 uint32_t subsampling_width,
1905 uint32_t dilation_height,
1906 uint32_t dilation_width,
1907 uint32_t groups,
1908 size_t group_input_channels,
1909 size_t group_output_channels,
1910 size_t input_channel_stride,
1911 size_t output_channel_stride,
1912 const void* kernel,
1913 const void* bias,
1914 float output_min,
1915 float output_max,
1916 uint32_t flags,
1917 xnn_operator_t* convolution_op_out);
1918
1919enum xnn_status xnn_setup_convolution2d_nhwc_f16(
1920 xnn_operator_t convolution_op,
1921 size_t batch_size,
1922 size_t input_height,
1923 size_t input_width,
1924 const void* input,
1925 void* output,
1926 pthreadpool_t threadpool);
1927
Frank Barchard0ccccf12020-06-22 15:21:45 -07001928enum xnn_status xnn_create_global_average_pooling_nwc_f16(
1929 size_t channels,
1930 size_t input_stride,
1931 size_t output_stride,
1932 float output_min,
1933 float output_max,
1934 uint32_t flags,
1935 xnn_operator_t* global_average_pooling_op_out);
1936
1937enum xnn_status xnn_setup_global_average_pooling_nwc_f16(
1938 xnn_operator_t global_average_pooling_op,
1939 size_t batch_size,
1940 size_t width,
1941 const void* input,
1942 void* output,
1943 pthreadpool_t threadpool);
1944
Frank Barcharda96948e2020-09-11 15:34:18 -07001945enum xnn_status xnn_create_hardswish_nc_f16(
1946 size_t channels,
1947 size_t input_stride,
1948 size_t output_stride,
1949 uint32_t flags,
1950 xnn_operator_t* hardswish_op_out);
1951
1952enum xnn_status xnn_setup_hardswish_nc_f16(
1953 xnn_operator_t hardswish_op,
1954 size_t batch_size,
1955 const void* input,
1956 void* output,
1957 pthreadpool_t threadpool);
1958
Marat Dukhand04e2dd2020-09-13 21:19:39 -07001959enum xnn_status xnn_create_multiply_nd_f16(
1960 float output_min,
1961 float output_max,
1962 uint32_t flags,
1963 xnn_operator_t* multiply_op_out);
1964
1965enum xnn_status xnn_setup_multiply_nd_f16(
1966 xnn_operator_t multiply_op,
1967 size_t num_input1_dims,
1968 const size_t* input1_shape,
1969 size_t num_input2_dims,
1970 const size_t* input2_shape,
1971 const void* input1,
1972 const void* input2,
1973 void* output,
1974 pthreadpool_t threadpool);
1975
Frank Barchard0ccccf12020-06-22 15:21:45 -07001976#endif // XNN_NO_F16_OPERATORS
1977
Marat Dukhan97262462021-06-18 16:14:17 -07001978#ifndef XNN_NO_QC8_OPERATORS
1979
1980enum xnn_status xnn_create_convolution2d_nhwc_qc8(
1981 uint32_t input_padding_top,
1982 uint32_t input_padding_right,
1983 uint32_t input_padding_bottom,
1984 uint32_t input_padding_left,
1985 uint32_t kernel_height,
1986 uint32_t kernel_width,
1987 uint32_t subsampling_height,
1988 uint32_t subsampling_width,
1989 uint32_t dilation_height,
1990 uint32_t dilation_width,
1991 uint32_t groups,
1992 size_t group_input_channels,
1993 size_t group_output_channels,
1994 size_t input_channel_stride,
1995 size_t output_channel_stride,
1996 int8_t input_zero_point,
1997 float input_scale,
1998 const float* kernel_scale,
1999 const int8_t* kernel,
2000 const int32_t* bias,
2001 int8_t output_zero_point,
2002 float output_scale,
2003 int8_t output_min,
2004 int8_t output_max,
2005 uint32_t flags,
2006 xnn_operator_t* convolution_op_out);
2007
2008enum xnn_status xnn_setup_convolution2d_nhwc_qc8(
2009 xnn_operator_t convolution_op,
2010 size_t batch_size,
2011 size_t input_height,
2012 size_t input_width,
2013 const int8_t* input,
2014 int8_t* output,
2015 pthreadpool_t threadpool);
2016
2017#endif // XNN_NO_QC8_OPERATORS
2018
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002019#ifndef XNN_NO_QS8_OPERATORS
2020
Marat Dukhanff209482020-09-03 14:26:53 -07002021enum xnn_status xnn_create_add_nd_qs8(
2022 int8_t input1_zero_point,
2023 float input1_scale,
2024 int8_t input2_zero_point,
2025 float input2_scale,
2026 int8_t output_zero_point,
2027 float output_scale,
2028 int8_t output_min,
2029 int8_t output_max,
2030 uint32_t flags,
2031 xnn_operator_t* add_op_out);
2032
2033enum xnn_status xnn_setup_add_nd_qs8(
2034 xnn_operator_t add_op,
2035 size_t num_input1_dims,
2036 const size_t* input1_shape,
2037 size_t num_input2_dims,
2038 const size_t* input2_shape,
2039 const int8_t* input1,
2040 const int8_t* input2,
2041 int8_t* output,
2042 pthreadpool_t threadpool);
2043
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002044enum xnn_status xnn_create_convolution2d_nhwc_qs8(
2045 uint32_t input_padding_top,
2046 uint32_t input_padding_right,
2047 uint32_t input_padding_bottom,
2048 uint32_t input_padding_left,
2049 uint32_t kernel_height,
2050 uint32_t kernel_width,
2051 uint32_t subsampling_height,
2052 uint32_t subsampling_width,
2053 uint32_t dilation_height,
2054 uint32_t dilation_width,
2055 uint32_t groups,
2056 size_t group_input_channels,
2057 size_t group_output_channels,
2058 size_t input_channel_stride,
2059 size_t output_channel_stride,
2060 int8_t input_zero_point,
2061 float input_scale,
2062 float kernel_scale,
2063 const int8_t* kernel,
2064 const int32_t* bias,
2065 int8_t output_zero_point,
2066 float output_scale,
2067 int8_t output_min,
2068 int8_t output_max,
2069 uint32_t flags,
2070 xnn_operator_t* convolution_op_out);
2071
2072enum xnn_status xnn_setup_convolution2d_nhwc_qs8(
2073 xnn_operator_t convolution_op,
2074 size_t batch_size,
2075 size_t input_height,
2076 size_t input_width,
2077 const int8_t* input,
2078 int8_t* output,
2079 pthreadpool_t threadpool);
2080
Marat Dukhanbea849a2021-07-30 16:25:30 -07002081enum xnn_status xnn_create_deconvolution2d_nhwc_qs8(
2082 uint32_t output_padding_top,
2083 uint32_t output_padding_right,
2084 uint32_t output_padding_bottom,
2085 uint32_t output_padding_left,
2086 uint32_t kernel_height,
2087 uint32_t kernel_width,
2088 uint32_t stride_height,
2089 uint32_t stride_width,
2090 uint32_t dilation_height,
2091 uint32_t dilation_width,
2092 uint32_t groups,
2093 size_t group_input_channels,
2094 size_t group_output_channels,
2095 size_t input_pixel_stride,
2096 size_t output_pixel_stride,
2097 int8_t input_zero_point,
2098 float input_scale,
2099 float kernel_scale,
2100 const int8_t* kernel,
2101 const int32_t* bias,
2102 int8_t output_zero_point,
2103 float output_scale,
2104 int8_t output_min,
2105 int8_t output_max,
2106 uint32_t flags,
2107 xnn_operator_t* deconvolution_op_out);
2108
2109enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8(
2110 xnn_operator_t deconvolution_op,
2111 size_t batch_size,
2112 size_t input_height,
2113 size_t input_width,
2114 uint32_t adjustment_height,
2115 uint32_t adjustment_width,
2116 const int8_t* input,
2117 int8_t* output,
2118 pthreadpool_t threadpool);
2119
Marat Dukhand23cb6e2021-04-01 01:18:58 -07002120enum xnn_status xnn_create_fully_connected_nc_qs8(
2121 size_t input_channels,
2122 size_t output_channels,
2123 size_t input_stride,
2124 size_t output_stride,
2125 int8_t input_zero_point,
2126 float input_scale,
2127 float kernel_scale,
2128 const int8_t* kernel,
2129 const int32_t* bias,
2130 int8_t output_zero_point,
2131 float output_scale,
2132 int8_t output_min,
2133 int8_t output_max,
2134 uint32_t flags,
2135 xnn_operator_t* fully_connected_op_out);
2136
2137enum xnn_status xnn_setup_fully_connected_nc_qs8(
2138 xnn_operator_t fully_connected_op,
2139 size_t batch_size,
2140 const int8_t* input,
2141 int8_t* output,
2142 pthreadpool_t threadpool);
2143
Marat Dukhan9e0b5392020-08-07 02:29:34 -07002144enum xnn_status xnn_create_global_average_pooling_nwc_qs8(
2145 size_t channels,
2146 size_t input_stride,
2147 size_t output_stride,
2148 int8_t input_zero_point,
2149 float input_scale,
2150 int8_t output_zero_point,
2151 float output_scale,
2152 int8_t output_min,
2153 int8_t output_max,
2154 uint32_t flags,
2155 xnn_operator_t* global_average_pooling_op_out);
2156
2157enum xnn_status xnn_setup_global_average_pooling_nwc_qs8(
2158 xnn_operator_t global_average_pooling_op,
2159 size_t batch_size,
2160 size_t width,
2161 const int8_t* input,
2162 int8_t* output,
2163 pthreadpool_t threadpool);
2164
Marat Dukhan0853b8a2021-08-03 01:01:53 -07002165enum xnn_status xnn_create_multiply_nd_qs8(
2166 int8_t input1_zero_point,
2167 float input1_scale,
2168 int8_t input2_zero_point,
2169 float input2_scale,
2170 int8_t output_zero_point,
2171 float output_scale,
2172 int8_t output_min,
2173 int8_t output_max,
2174 uint32_t flags,
2175 xnn_operator_t* multiply_op_out);
2176
2177enum xnn_status xnn_setup_multiply_nd_qs8(
2178 xnn_operator_t multiply_op,
2179 size_t num_input1_dims,
2180 const size_t* input1_shape,
2181 size_t num_input2_dims,
2182 const size_t* input2_shape,
2183 const int8_t* input1,
2184 const int8_t* input2,
2185 int8_t* output,
2186 pthreadpool_t threadpool);
2187
Marat Dukhan8e2fd202021-09-07 18:42:01 -07002188enum xnn_status xnn_create_subtract_nd_qs8(
2189 int8_t input1_zero_point,
2190 float input1_scale,
2191 int8_t input2_zero_point,
2192 float input2_scale,
2193 int8_t output_zero_point,
2194 float output_scale,
2195 int8_t output_min,
2196 int8_t output_max,
2197 uint32_t flags,
2198 xnn_operator_t* subtract_op_out);
2199
2200enum xnn_status xnn_setup_subtract_nd_qs8(
2201 xnn_operator_t subtract_op,
2202 size_t num_input1_dims,
2203 const size_t* input1_shape,
2204 size_t num_input2_dims,
2205 const size_t* input2_shape,
2206 const int8_t* input1,
2207 const int8_t* input2,
2208 int8_t* output,
2209 pthreadpool_t threadpool);
2210
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002211#endif // XNN_NO_QS8_OPERATORS
2212
Marat Dukhan08b7a972020-07-14 18:17:29 -07002213#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhand6209722019-10-07 12:54:25 -07002214
Marat Dukhandb007cd2021-07-20 23:42:39 -07002215enum xnn_status xnn_create_add_nd_qu8(
2216 uint8_t input1_zero_point,
2217 float input1_scale,
2218 uint8_t input2_zero_point,
2219 float input2_scale,
2220 uint8_t output_zero_point,
2221 float output_scale,
2222 uint8_t output_min,
2223 uint8_t output_max,
2224 uint32_t flags,
2225 xnn_operator_t* add_op_out);
2226
2227enum xnn_status xnn_setup_add_nd_qu8(
2228 xnn_operator_t add_op,
2229 size_t num_input1_dims,
2230 const size_t* input1_shape,
2231 size_t num_input2_dims,
2232 const size_t* input2_shape,
2233 const uint8_t* input1,
2234 const uint8_t* input2,
2235 uint8_t* output,
2236 pthreadpool_t threadpool);
2237
Marat Dukhan08b7a972020-07-14 18:17:29 -07002238enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002239 uint32_t input_padding_top,
2240 uint32_t input_padding_right,
2241 uint32_t input_padding_bottom,
2242 uint32_t input_padding_left,
2243 uint32_t pooling_height,
2244 uint32_t pooling_width,
2245 uint32_t stride_height,
2246 uint32_t stride_width,
2247 size_t channels,
2248 size_t input_pixel_stride,
2249 size_t output_pixel_stride,
2250 uint8_t input_zero_point,
2251 float input_scale,
2252 uint8_t output_zero_point,
2253 float output_scale,
2254 uint8_t output_min,
2255 uint8_t output_max,
2256 uint32_t flags,
2257 xnn_operator_t* average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07002258
Marat Dukhan08b7a972020-07-14 18:17:29 -07002259enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002260 xnn_operator_t average_pooling_op,
2261 size_t batch_size,
2262 size_t input_height,
2263 size_t input_width,
2264 const uint8_t* input,
2265 uint8_t* output,
2266 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07002267
Marat Dukhan08b7a972020-07-14 18:17:29 -07002268enum xnn_status xnn_create_convolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002269 uint32_t input_padding_top,
2270 uint32_t input_padding_right,
2271 uint32_t input_padding_bottom,
2272 uint32_t input_padding_left,
2273 uint32_t kernel_height,
2274 uint32_t kernel_width,
2275 uint32_t subsampling_height,
2276 uint32_t subsampling_width,
2277 uint32_t dilation_height,
2278 uint32_t dilation_width,
2279 uint32_t groups,
2280 size_t group_input_channels,
2281 size_t group_output_channels,
Marat Dukhan08b7a972020-07-14 18:17:29 -07002282 size_t input_channel_stride,
2283 size_t output_channel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08002284 uint8_t input_zero_point,
2285 float input_scale,
2286 uint8_t kernel_zero_point,
2287 float kernel_scale,
2288 const uint8_t* kernel,
2289 const int32_t* bias,
2290 uint8_t output_zero_point,
2291 float output_scale,
2292 uint8_t output_min,
2293 uint8_t output_max,
2294 uint32_t flags,
2295 xnn_operator_t* convolution_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002296
Marat Dukhan08b7a972020-07-14 18:17:29 -07002297enum xnn_status xnn_setup_convolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002298 xnn_operator_t convolution_op,
2299 size_t batch_size,
2300 size_t input_height,
2301 size_t input_width,
2302 const uint8_t* input,
2303 uint8_t* output,
2304 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002305
Marat Dukhan08b7a972020-07-14 18:17:29 -07002306enum xnn_status xnn_create_deconvolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002307 uint32_t output_padding_top,
2308 uint32_t output_padding_right,
2309 uint32_t output_padding_bottom,
2310 uint32_t output_padding_left,
2311 uint32_t kernel_height,
2312 uint32_t kernel_width,
2313 uint32_t stride_height,
2314 uint32_t stride_width,
2315 uint32_t dilation_height,
2316 uint32_t dilation_width,
2317 uint32_t groups,
2318 size_t group_input_channels,
2319 size_t group_output_channels,
2320 size_t input_pixel_stride,
2321 size_t output_pixel_stride,
2322 uint8_t input_zero_point,
2323 float input_scale,
2324 uint8_t kernel_zero_point,
2325 float kernel_scale,
2326 const uint8_t* kernel,
2327 const int32_t* bias,
2328 uint8_t output_zero_point,
2329 float output_scale,
2330 uint8_t output_min,
2331 uint8_t output_max,
2332 uint32_t flags,
2333 xnn_operator_t* deconvolution_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002334
Marat Dukhan08b7a972020-07-14 18:17:29 -07002335enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002336 xnn_operator_t deconvolution_op,
2337 size_t batch_size,
2338 size_t input_height,
2339 size_t input_width,
2340 uint32_t adjustment_height,
2341 uint32_t adjustment_width,
2342 const uint8_t* input,
2343 uint8_t* output,
2344 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002345
Marat Dukhan08b7a972020-07-14 18:17:29 -07002346enum xnn_status xnn_create_fully_connected_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002347 size_t input_channels,
2348 size_t output_channels,
2349 size_t input_stride,
2350 size_t output_stride,
2351 uint8_t input_zero_point,
2352 float input_scale,
2353 uint8_t kernel_zero_point,
2354 float kernel_scale,
2355 const uint8_t* kernel,
2356 const int32_t* bias,
2357 uint8_t output_zero_point,
2358 float output_scale,
2359 uint8_t output_min,
2360 uint8_t output_max,
2361 uint32_t flags,
2362 xnn_operator_t* fully_connected_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002363
Marat Dukhan08b7a972020-07-14 18:17:29 -07002364enum xnn_status xnn_setup_fully_connected_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002365 xnn_operator_t fully_connected_op,
2366 size_t batch_size,
2367 const uint8_t* input,
2368 uint8_t* output,
2369 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002370
Marat Dukhan08b7a972020-07-14 18:17:29 -07002371enum xnn_status xnn_create_global_average_pooling_nwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002372 size_t channels,
2373 size_t input_stride,
2374 size_t output_stride,
2375 uint8_t input_zero_point,
2376 float input_scale,
2377 uint8_t output_zero_point,
2378 float output_scale,
2379 uint8_t output_min,
2380 uint8_t output_max,
2381 uint32_t flags,
2382 xnn_operator_t* global_average_pooling_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002383
Marat Dukhan08b7a972020-07-14 18:17:29 -07002384enum xnn_status xnn_setup_global_average_pooling_nwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002385 xnn_operator_t global_average_pooling_op,
2386 size_t batch_size,
2387 size_t width,
2388 const uint8_t* input,
2389 uint8_t* output,
2390 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002391
Marat Dukhan08b7a972020-07-14 18:17:29 -07002392enum xnn_status xnn_create_leaky_relu_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002393 size_t channels,
2394 size_t input_stride,
2395 size_t output_stride,
2396 float negative_slope,
2397 uint8_t input_zero_point,
2398 float input_scale,
2399 uint8_t output_zero_point,
2400 float output_scale,
2401 uint8_t output_min,
2402 uint8_t output_max,
2403 uint32_t flags,
2404 xnn_operator_t* leaky_relu_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002405
Marat Dukhan08b7a972020-07-14 18:17:29 -07002406enum xnn_status xnn_setup_leaky_relu_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002407 xnn_operator_t leaky_relu_op,
2408 size_t batch_size,
2409 const uint8_t* input,
2410 uint8_t* output,
2411 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002412
Marat Dukhan0853b8a2021-08-03 01:01:53 -07002413enum xnn_status xnn_create_multiply_nd_qu8(
2414 uint8_t input1_zero_point,
2415 float input1_scale,
2416 uint8_t input2_zero_point,
2417 float input2_scale,
2418 uint8_t output_zero_point,
2419 float output_scale,
2420 uint8_t output_min,
2421 uint8_t output_max,
2422 uint32_t flags,
2423 xnn_operator_t* multiply_op_out);
2424
2425enum xnn_status xnn_setup_multiply_nd_qu8(
2426 xnn_operator_t multiply_op,
2427 size_t num_input1_dims,
2428 const size_t* input1_shape,
2429 size_t num_input2_dims,
2430 const size_t* input2_shape,
2431 const uint8_t* input1,
2432 const uint8_t* input2,
2433 uint8_t* output,
2434 pthreadpool_t threadpool);
2435
Marat Dukhan08b7a972020-07-14 18:17:29 -07002436enum xnn_status xnn_create_sigmoid_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002437 size_t channels,
2438 size_t input_stride,
2439 size_t output_stride,
2440 uint8_t input_zero_point,
2441 float input_scale,
2442 uint8_t output_zero_point,
2443 float output_scale,
2444 uint8_t output_min,
2445 uint8_t output_max,
2446 uint32_t flags,
2447 xnn_operator_t* sigmoid_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002448
Marat Dukhan08b7a972020-07-14 18:17:29 -07002449enum xnn_status xnn_setup_sigmoid_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002450 xnn_operator_t sigmoid_op,
2451 size_t batch_size,
2452 const uint8_t* input,
2453 uint8_t* output,
2454 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07002455
Marat Dukhan08b7a972020-07-14 18:17:29 -07002456enum xnn_status xnn_create_softmax_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002457 size_t channels,
2458 size_t input_stride,
2459 size_t output_stride,
2460 float input_scale,
2461 uint8_t output_zero_point,
2462 float output_scale,
2463 uint32_t flags,
2464 xnn_operator_t* softmax_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07002465
Marat Dukhan08b7a972020-07-14 18:17:29 -07002466enum xnn_status xnn_setup_softmax_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002467 xnn_operator_t softmax_op,
2468 size_t batch_size,
2469 const uint8_t* input,
2470 uint8_t* output,
2471 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07002472
Marat Dukhan8e2fd202021-09-07 18:42:01 -07002473enum xnn_status xnn_create_subtract_nd_qu8(
2474 uint8_t input1_zero_point,
2475 float input1_scale,
2476 uint8_t input2_zero_point,
2477 float input2_scale,
2478 uint8_t output_zero_point,
2479 float output_scale,
2480 uint8_t output_min,
2481 uint8_t output_max,
2482 uint32_t flags,
2483 xnn_operator_t* subtract_op_out);
2484
2485enum xnn_status xnn_setup_subtract_nd_qu8(
2486 xnn_operator_t subtract_op,
2487 size_t num_input1_dims,
2488 const size_t* input1_shape,
2489 size_t num_input2_dims,
2490 const size_t* input2_shape,
2491 const uint8_t* input1,
2492 const uint8_t* input2,
2493 uint8_t* output,
2494 pthreadpool_t threadpool);
2495
Marat Dukhan08b7a972020-07-14 18:17:29 -07002496#endif // XNN_NO_QU8_OPERATORS
Marat Dukhand6209722019-10-07 12:54:25 -07002497
Marat Dukhan94912792021-08-16 21:40:30 -07002498#ifndef XNN_NO_S8_OPERATORS
Marat Dukhand6209722019-10-07 12:54:25 -07002499
Marat Dukhan61c0c9e2021-08-16 23:16:14 -07002500enum xnn_status xnn_create_clamp_nc_s8(
2501 size_t channels,
2502 size_t input_stride,
2503 size_t output_stride,
2504 int8_t output_min,
2505 int8_t output_max,
2506 uint32_t flags,
2507 xnn_operator_t* clamp_op_out);
2508
2509enum xnn_status xnn_setup_clamp_nc_s8(
2510 xnn_operator_t clamp_op,
2511 size_t batch_size,
2512 const int8_t* input,
2513 int8_t* output,
2514 pthreadpool_t threadpool);
2515
Marat Dukhandc5c1482021-08-16 09:03:15 -07002516enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
2517 uint32_t input_padding_top,
2518 uint32_t input_padding_right,
2519 uint32_t input_padding_bottom,
2520 uint32_t input_padding_left,
2521 uint32_t pooling_height,
2522 uint32_t pooling_width,
2523 uint32_t stride_height,
2524 uint32_t stride_width,
2525 uint32_t dilation_height,
2526 uint32_t dilation_width,
2527 size_t channels,
2528 size_t input_pixel_stride,
2529 size_t output_pixel_stride,
2530 int8_t output_min,
2531 int8_t output_max,
2532 uint32_t flags,
2533 xnn_operator_t* max_pooling_op_out);
2534
2535enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
2536 xnn_operator_t max_pooling_op,
2537 size_t batch_size,
2538 size_t input_height,
2539 size_t input_width,
2540 const int8_t* input,
2541 int8_t* output,
2542 pthreadpool_t threadpool);
2543
Marat Dukhan94912792021-08-16 21:40:30 -07002544#endif // XNN_NO_S8_OPERATORS
Marat Dukhandc5c1482021-08-16 09:03:15 -07002545
2546#ifndef XNN_NO_U8_OPERATORS
2547
Marat Dukhand6209722019-10-07 12:54:25 -07002548enum xnn_status xnn_create_clamp_nc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002549 size_t channels,
2550 size_t input_stride,
2551 size_t output_stride,
2552 uint8_t output_min,
2553 uint8_t output_max,
2554 uint32_t flags,
2555 xnn_operator_t* clamp_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07002556
2557enum xnn_status xnn_setup_clamp_nc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002558 xnn_operator_t clamp_op,
2559 size_t batch_size,
2560 const uint8_t* input,
2561 uint8_t* output,
2562 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002563
2564enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002565 uint32_t input_padding_top,
2566 uint32_t input_padding_right,
2567 uint32_t input_padding_bottom,
2568 uint32_t input_padding_left,
2569 uint32_t pooling_height,
2570 uint32_t pooling_width,
2571 uint32_t stride_height,
2572 uint32_t stride_width,
2573 uint32_t dilation_height,
2574 uint32_t dilation_width,
2575 size_t channels,
2576 size_t input_pixel_stride,
2577 size_t output_pixel_stride,
2578 uint8_t output_min,
2579 uint8_t output_max,
2580 uint32_t flags,
2581 xnn_operator_t* max_pooling_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002582
2583enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002584 xnn_operator_t max_pooling_op,
2585 size_t batch_size,
2586 size_t input_height,
2587 size_t input_width,
2588 const uint8_t* input,
2589 uint8_t* output,
2590 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002591
Marat Dukhand6209722019-10-07 12:54:25 -07002592#endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002593
Marat Dukhand6209722019-10-07 12:54:25 -07002594#ifndef XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002595
2596enum xnn_status xnn_create_channel_shuffle_nc_x8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002597 size_t groups,
2598 size_t group_channels,
2599 size_t input_stride,
2600 size_t output_stride,
2601 uint32_t flags,
2602 xnn_operator_t* channel_shuffle_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002603
2604enum xnn_status xnn_setup_channel_shuffle_nc_x8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002605 xnn_operator_t channel_shuffle_op,
2606 size_t batch_size,
2607 const void* input,
2608 void* output,
2609 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002610
Marat Dukhan139e9612021-08-09 09:03:07 -07002611enum xnn_status xnn_create_constant_pad_nd_x8(
2612 const void* padding_value,
2613 uint32_t flags,
2614 xnn_operator_t* constant_pad_op_out);
2615
2616enum xnn_status xnn_setup_constant_pad_nd_x8(
2617 xnn_operator_t constant_pad_op,
2618 size_t num_dims,
2619 const size_t* input_shape,
2620 const size_t* pre_padding,
2621 const size_t* post_padding,
2622 const void* input,
2623 void* output,
2624 pthreadpool_t threadpool);
2625
Marat Dukhand6209722019-10-07 12:54:25 -07002626#endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002627
2628#ifdef __cplusplus
2629} // extern "C"
2630#endif