blob: 88d20c7c0eb6840c8927a40b3edce8e067fe81b3 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14
15#include <pthreadpool.h>
16
17#ifdef __cplusplus
18extern "C" {
19#endif
20
Marat Dukhan5609a082019-10-07 10:56:58 -070021/// The number of bytes XNNPACK may read beyond array bounds.
XNNPACK Team965272b2020-10-23 21:10:15 -070022/// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK.
Marat Dukhan5609a082019-10-07 10:56:58 -070023///
24/// Note: XNNPACK reads, but never writes beyond array bounds.
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#define XNN_EXTRA_BYTES 16
26
Marat Dukhanca2733c2019-11-15 23:21:17 -080027/// Maximum number of dimensions in tensor shape.
Marat Dukhanfc2b96e2019-12-03 12:04:04 -080028#define XNN_MAX_TENSOR_DIMS 6
Marat Dukhanca2733c2019-11-15 23:21:17 -080029
Marat Dukhan7332e832020-12-06 23:26:11 -080030/// Allow sparse inference in a Runtime.
31///
32/// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it.
33#define XNN_FLAG_SPARSE_INFERENCE 0x00000001
34
Marat Dukhan4620ca62022-02-03 12:31:00 -080035/// Allow IEEE FP16 inference in a Runtime.
36///
37/// Note: this flag forces XNNPACK to consider IEEE FP16 inference, but does not guarantee it.
38#define XNN_FLAG_FP16_INFERENCE 0x00000002
39
Marat Dukhan5609a082019-10-07 10:56:58 -070040/// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
Marat Dukhandd69f0b2019-10-04 19:40:03 -070041#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
XNNPACK Teamb455b122019-09-27 18:10:33 -070042
Marat Dukhanc4f0ff92019-12-03 14:59:08 -080043/// Assume transposed weights in a fully connected operator.
44#define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001
45
Marat Dukhan5609a082019-10-07 10:56:58 -070046/// The operator assumes NHWC layout for the input, regardless of the output layout.
XNNPACK Teamb455b122019-09-27 18:10:33 -070047#define XNN_FLAG_INPUT_NHWC 0x00000002
48
Marat Dukhan8440fde2019-10-24 12:46:13 -070049/// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size.
50#define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004
51
Marat Dukhan853bb7a2021-04-15 15:52:25 -070052/// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor.
Marat Dukhan38c07ec2020-04-23 16:44:32 -070053#define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004
54
Marat Dukhan69722492019-11-11 19:55:50 -080055/// Match behaviour of TensorFlow 1.x.
56#define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004
57
Marat Dukhan6989ec42022-01-14 17:14:35 -080058/// Static weights of the FP16 operator are in FP32 format.
59#define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008
60
Marat Dukhan69722492019-11-11 19:55:50 -080061/// Align corners of input and output images in resize operations.
62#define XNN_FLAG_ALIGN_CORNERS 0x00000008
63
Marat Dukhan942359e2021-06-09 00:38:56 -070064/// Yield worker threads of the thread pool to the system scheduler after the inference.
65#define XNN_FLAG_YIELD_WORKERS 0x00000010
66
Marat Dukhan5609a082019-10-07 10:56:58 -070067/// Status code for any XNNPACK function call.
XNNPACK Teamb455b122019-09-27 18:10:33 -070068enum xnn_status {
Marat Dukhan5609a082019-10-07 10:56:58 -070069 /// The call succeeded, and all output arguments now contain valid data.
XNNPACK Teamb455b122019-09-27 18:10:33 -070070 xnn_status_success = 0,
71 xnn_status_uninitialized = 1,
72 xnn_status_invalid_parameter = 2,
73 xnn_status_invalid_state = 3,
74 xnn_status_unsupported_parameter = 4,
75 xnn_status_unsupported_hardware = 5,
76 xnn_status_out_of_memory = 6,
77};
78
Marat Dukhan04f03be2019-11-19 12:36:47 -080079struct xnn_allocator {
80 /// User-specified pointer that will be passed as-is to all functions in this structure.
81 void* context;
82 /// Pointer to a function to be called for general memory allocation.
83 ///
84 /// @param context - The user-specified pointer from xnn_allocator structure.
85 /// @param size - The size of the memory block to allocate, in bytes.
86 ///
87 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
88 /// If allocation fails, the function must return NULL.
89 void* (*allocate)(void* context, size_t size);
90 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously
91 /// allocated memory block. The content of the old memory block is copied to the new memory block.
92 ///
93 /// @param context - The user-specified pointer from xnn_allocator structure.
94 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
95 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call.
96 /// @param size - The new size of the memory block to allocate, in bytes.
97 ///
98 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous
99 /// memory block.
100 /// If allocation fails, the function must return NULL, but must not release the previous memory block.
101 void* (*reallocate)(void* context, void* pointer, size_t size);
102 /// Pointer to a function to be called for general memory de-allocation.
103 ///
104 /// @param context - The user-specified pointer from xnn_allocator structure.
105 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
106 /// If the pointer is NULL, the @ref deallocate call is a no-op.
107 void (*deallocate)(void* context, void* pointer);
108 /// Pointer to a function to be called for aligned memory allocation.
109 ///
110 /// @param context - The user-specified pointer from xnn_allocator structure.
111 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2.
112 /// @param size - The size of the memory block to allocate, in bytes.
113 ///
114 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
115 /// If allocation fails, the function must return NULL.
116 void* (*aligned_allocate)(void* context, size_t alignment, size_t size);
117 /// Pointer to a function to be called for aligned memory de-allocation.
118 ///
119 /// @param context - The user-specified pointer from xnn_allocator structure.
120 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL.
121 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op.
122 void (*aligned_deallocate)(void* context, void* pointer);
123};
124
Marat Dukhan5609a082019-10-07 10:56:58 -0700125/// Initialize XNNPACK library.
126///
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700127/// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures
128/// depending on the host processor. Initialization can be time-consuming.
Marat Dukhan5609a082019-10-07 10:56:58 -0700129///
Marat Dukhan04f03be2019-11-19 12:36:47 -0800130/// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation.
131/// If this argument is NULL, system-provided memory management functions (e.g. malloc/free)
132/// will be used.
133///
slowy07ab1127f2021-07-27 08:23:22 +0700134/// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use.
Marat Dukhan5609a082019-10-07 10:56:58 -0700135/// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition.
136/// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the
137/// minimum hardware requirements for XNNPACK. E.g. this may happen on x86
138/// processors without SSE2 extension, or on 32-bit ARM processors without
139/// the NEON SIMD extension.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800140enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700141
Marat Dukhan5609a082019-10-07 10:56:58 -0700142/// Deinitialize XNNPACK library.
143///
144/// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call.
145///
146/// @retval xnn_status_success - deinitialization call succeeded.
XNNPACK Teamb455b122019-09-27 18:10:33 -0700147enum xnn_status xnn_deinitialize(void);
148
Marat Dukhandd2b5882020-02-06 15:12:26 -0800149/// Subgraph is an abstract representation of a neural network model.
150/// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800151typedef struct xnn_subgraph* xnn_subgraph_t;
152
Marat Dukhandd2b5882020-02-06 15:12:26 -0800153/// Create a empty Subgraph object.
154///
155/// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation.
156/// The Subgraph object would avoid creating internal Value IDs in the
157/// [0, reserved_value_ids-1] range.
158/// @param flags - binary features of the subgraph. No supported flags are currently defined.
159/// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon
160/// successful return.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800161enum xnn_status xnn_create_subgraph(
162 uint32_t external_value_ids,
163 uint32_t flags,
164 xnn_subgraph_t* subgraph_out);
165
Marat Dukhandd2b5882020-02-06 15:12:26 -0800166/// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph.
167///
168/// @param subgraph - the Subgraph object to destroy.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800169enum xnn_status xnn_delete_subgraph(
170 xnn_subgraph_t subgraph);
171
172#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001
173#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
174
175#define XNN_INVALID_VALUE_ID UINT32_MAX
176
Marat Dukhandd2b5882020-02-06 15:12:26 -0800177/// Type of elements in a Value object.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800178enum xnn_datatype {
Marat Dukhandd2b5882020-02-06 15:12:26 -0800179 /// Invalid data type. Valid Values never have this datatype.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800180 xnn_datatype_invalid = 0,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800181 /// IEEE754 single-precision floating-point.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800182 xnn_datatype_fp32 = 1,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800183 /// IEEE754 half-precision floating-point.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800184 xnn_datatype_fp16 = 2,
Marat Dukhan30757192021-03-29 18:19:13 -0700185 /// Quantized 8-bit signed integer with shared per-Value quantization parameters.
Marat Dukhan43ebc052021-03-29 17:49:52 -0700186 xnn_datatype_qint8 = 3,
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700187 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
188 xnn_datatype_quint8 = 4,
Marat Dukhan30757192021-03-29 18:19:13 -0700189 /// Quantized 32-bit signed integer with shared per-Value quantization parameters.
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700190 xnn_datatype_qint32 = 5,
Marat Dukhana11a1e82021-06-24 13:10:13 -0700191 /// Quantized 8-bit signed integer with shared per-channel quantization parameters.
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700192 xnn_datatype_qcint8 = 6,
Marat Dukhana11a1e82021-06-24 13:10:13 -0700193 /// Quantized 32-bit signed integer with shared per-channel quantization parameters.
Marat Dukhan8c8c1592021-07-13 13:59:02 -0700194 xnn_datatype_qcint32 = 7,
Marat Dukhan1d75a542020-02-03 12:23:01 -0800195};
196
Marat Dukhandd2b5882020-02-06 15:12:26 -0800197/// Define a tensor-type Value and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800198///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800199/// @param subgraph - a Subgraph object that will own the created Value.
200/// @param datatype - type of the tensor elements.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800201/// @param num_dims - number of dimensions in the shape.
202/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800203/// XNNPACK does not keep any pointers to this array after the function returns.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800204/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800205/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
206/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
207/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
208/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
209/// created for the Value.
210/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
211/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
212/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
213/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800214enum xnn_status xnn_define_tensor_value(
215 xnn_subgraph_t subgraph,
216 enum xnn_datatype datatype,
217 size_t num_dims,
218 const size_t* dims,
219 const void* data,
220 uint32_t external_id,
221 uint32_t flags,
222 uint32_t* id_out);
223
Marat Dukhan43ebc052021-03-29 17:49:52 -0700224/// Define a quantized tensor-type Value and add it to a Subgraph.
225///
226/// @param subgraph - a Subgraph object that will own the created Value.
227/// @param datatype - type of the tensor elements.
Marat Dukhan30757192021-03-29 18:19:13 -0700228/// @param zero_point - offset from zero to subtract from the quantized elements in the Value.
229/// @param scale - multiplication factor to convert quantized elements to real representation.
Marat Dukhan43ebc052021-03-29 17:49:52 -0700230/// @param num_dims - number of dimensions in the shape.
231/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
232/// XNNPACK does not keep any pointers to this array after the function returns.
233/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
234/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
235/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
236/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
237/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
238/// created for the Value.
239/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
240/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
241/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
242/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
243enum xnn_status xnn_define_quantized_tensor_value(
244 xnn_subgraph_t subgraph,
245 enum xnn_datatype datatype,
246 int32_t zero_point,
247 float scale,
248 size_t num_dims,
249 const size_t* dims,
250 const void* data,
251 uint32_t external_id,
252 uint32_t flags,
253 uint32_t* id_out);
254
Marat Dukhana11a1e82021-06-24 13:10:13 -0700255/// Define a channelwise quantized tensor-type Value and add it to a Subgraph.
256///
257/// @param subgraph - a Subgraph object that will own the created Value.
258/// @param datatype - type of the tensor elements.
259/// @param scale - per-channel multiplication factors to convert quantized elements to real representation.
260/// @param num_dims - number of dimensions in the shape.
261/// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters.
262/// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution,
263/// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in
264/// the Depthwise Convolution operators.
265/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
266/// XNNPACK does not keep any pointers to this array after the function returns.
267/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
268/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
269/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
270/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
271/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
272/// created for the Value.
273/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
274/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
275/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
276/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
277enum xnn_status xnn_define_channelwise_quantized_tensor_value(
278 xnn_subgraph_t subgraph,
279 enum xnn_datatype datatype,
280 const float* scale,
281 size_t num_dims,
282 size_t channel_dim,
283 const size_t* dims,
284 const void* data,
285 uint32_t external_id,
286 uint32_t flags,
287 uint32_t* id_out);
288
Marat Dukhan20483c72021-12-05 09:56:40 -0800289/// Define a Convert Node and add it to a Subgraph.
290///
291/// @param subgraph - a Subgraph object that will own the created Node.
292/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
293/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
294/// shape must match the shape of the input tensor.
295/// @param flags - binary features of the Convert Node. No supported flags are currently defined.
296enum xnn_status xnn_define_convert(
297 xnn_subgraph_t subgraph,
298 uint32_t input_id,
299 uint32_t output_id,
300 uint32_t flags);
301
Marat Dukhandd2b5882020-02-06 15:12:26 -0800302/// Define a 2D Convolution Node and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800303///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800304/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan15d1f512020-02-24 08:06:33 -0800305/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
306/// flag is specified.
307/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
308/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
309/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
310/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
311/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
312/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800313/// @param kernel_height - kernel (filter) height.
314/// @param kernel_width - kernel (filter) width.
315/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
316/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
317/// @param dilation_height - dilation of kernel elements along the height dimension.
318/// @param dilation_width - dilation of kernel elements along the width dimension.
319/// @param groups - number of convolution groups.
320/// @param group_input_channels - number of input channels per group.
321/// @param group_output_channels - number of output channels per group.
322/// @param output_min - lower bound for clipping output values.
323/// @param output_max - upper bound for clipping output values.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800324/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
325/// with [N, IH, IW, groups * group_input_channels] dimensions
326/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
327/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
328/// dimensions.
329/// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
330/// [groups * group_output_channels] dimensions.
331/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
332/// with [N, OH, OW, groups * group_output_channels] dimensions.
333/// @param flags - binary features of the 2D Convolution Node. The only currently supported values is
334/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800335enum xnn_status xnn_define_convolution_2d(
336 xnn_subgraph_t subgraph,
337 uint32_t input_padding_top,
338 uint32_t input_padding_right,
339 uint32_t input_padding_bottom,
340 uint32_t input_padding_left,
341 uint32_t kernel_height,
342 uint32_t kernel_width,
343 uint32_t subsampling_height,
344 uint32_t subsampling_width,
345 uint32_t dilation_height,
346 uint32_t dilation_width,
347 uint32_t groups,
348 size_t group_input_channels,
349 size_t group_output_channels,
350 float output_min,
351 float output_max,
352 uint32_t input_id,
353 uint32_t filter_id,
354 uint32_t bias_id,
355 uint32_t output_id,
356 uint32_t flags);
357
Marat Dukhanf5870842020-04-27 18:19:54 -0700358/// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph.
359///
360/// @param subgraph - a Subgraph object that will own the created Node.
361/// @param padding_top - implicit padding above 2D output data.
362/// @param padding_right - implicit padding to the right of 2D output data.
363/// @param padding_bottom - implicit padding below 2D output data.
364/// @param padding_left - implicit padding to the left of 2D output data.
365/// @param adjustment_height - additional elements in the bottom of the 2D output data.
366/// @param adjustment_width - additional elements to the right of the 2D output data.
367/// @param kernel_height - kernel (filter) height.
368/// @param kernel_width - kernel (filter) width.
369/// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride).
370/// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride).
371/// @param dilation_height - dilation of kernel elements along the height dimension.
372/// @param dilation_width - dilation of kernel elements along the width dimension.
373/// @param groups - number of convolution groups.
374/// @param group_input_channels - number of input channels per group.
375/// @param group_output_channels - number of output channels per group.
376/// @param output_min - lower bound for clipping output values.
377/// @param output_max - upper bound for clipping output values.
378/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
379/// with [N, IH, IW, groups * group_input_channels] dimensions
380/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
381/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
382/// dimensions.
Marat Dukhana9992252021-04-15 16:47:24 -0700383/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If
384/// present, the bias tensor must be a 1D tensor defined in the @a subgraph with
Marat Dukhanf5870842020-04-27 18:19:54 -0700385/// [groups * group_output_channels] dimensions.
386/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
387/// with [N, OH, OW, groups * group_output_channels] dimensions.
388/// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined.
389enum xnn_status xnn_define_deconvolution_2d(
390 xnn_subgraph_t subgraph,
391 uint32_t padding_top,
392 uint32_t padding_right,
393 uint32_t padding_bottom,
394 uint32_t padding_left,
395 uint32_t adjustment_height,
396 uint32_t adjustment_width,
397 uint32_t kernel_height,
398 uint32_t kernel_width,
399 uint32_t upsampling_height,
400 uint32_t upsampling_width,
401 uint32_t dilation_height,
402 uint32_t dilation_width,
403 uint32_t groups,
404 size_t group_input_channels,
405 size_t group_output_channels,
406 float output_min,
407 float output_max,
408 uint32_t input_id,
409 uint32_t filter_id,
410 uint32_t bias_id,
411 uint32_t output_id,
412 uint32_t flags);
413
Marat Dukhandd2b5882020-02-06 15:12:26 -0800414/// Define a 2D Depthwise Convolution Node and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800415///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800416/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan15d1f512020-02-24 08:06:33 -0800417/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
418/// flag is specified.
419/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
420/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
421/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
422/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
423/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
424/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800425/// @param kernel_height - kernel (filter) height.
426/// @param kernel_width - kernel (filter) width.
427/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
428/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
429/// @param dilation_height - dilation of kernel elements along the height dimension.
430/// @param dilation_width - dilation of kernel elements along the width dimension.
431/// @param depth_multiplier - ratio of output channels to input channels.
432/// @param input_channels - number of input channels.
433/// @param output_min - lower bound for clipping output values.
434/// @param output_max - upper bound for clipping output values.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800435/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
436/// with [N, IH, IW, input_channels] dimensions
437/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
438/// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
Marat Dukhana9992252021-04-15 16:47:24 -0700439/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without
440/// a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with
Marat Dukhandd2b5882020-02-06 15:12:26 -0800441/// [input_channels * depth_multiplier] dimensions.
442/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
443/// with [N, OH, OW, input_channels * depth_multiplier] dimensions.
444/// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is
445/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800446enum xnn_status xnn_define_depthwise_convolution_2d(
447 xnn_subgraph_t subgraph,
448 uint32_t input_padding_top,
449 uint32_t input_padding_right,
450 uint32_t input_padding_bottom,
451 uint32_t input_padding_left,
452 uint32_t kernel_height,
453 uint32_t kernel_width,
454 uint32_t subsampling_height,
455 uint32_t subsampling_width,
456 uint32_t dilation_height,
457 uint32_t dilation_width,
458 uint32_t depth_multiplier,
459 size_t input_channels,
460 float output_min,
461 float output_max,
462 uint32_t input_id,
463 uint32_t filter_id,
464 uint32_t bias_id,
465 uint32_t output_id,
466 uint32_t flags);
467
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700468/// Define a Depth To Space Node and add it to a Subgraph.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800469///
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700470/// The Depth To Space Node rearranges data from depth into blocks of spatial data (a reverse transform to
471/// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values
472/// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times
473/// smaller than that of the input.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800474///
475/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700476/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
477/// with [N, IH, IW, OC * block_size * block_size] dimensions.
478/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
479/// with [N, IH * block_size, IW * block_size, OC] dimensions.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800480/// @param block_size - the size of the spatial block.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700481/// @param flags - binary features of the input_channels Node. No supported flags are currently defined.
Artsiom Ablavatskibbe85062020-11-05 14:07:37 -0800482enum xnn_status xnn_define_depth_to_space(
483 xnn_subgraph_t subgraph,
484 uint32_t input_id,
485 uint32_t output_id,
486 uint32_t block_size,
487 uint32_t flags);
488
Marat Dukhana059b7d2020-06-11 11:41:27 -0700489/// Define a 2D Global Average Pooling Node and add it to a Subgraph.
490///
491/// @param subgraph - a Subgraph object that will own the created Node.
492/// @param output_min - lower bound for clipping output values.
493/// @param output_max - upper bound for clipping output values.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700494/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
495/// with [N, H, W, C] dimensions
496/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
497/// with [N, 1, 1, C] dimensions.
498/// @param flags - binary features of the 2D Global Average Pooling Node. No supported flags are currently defined.
Marat Dukhana059b7d2020-06-11 11:41:27 -0700499enum xnn_status xnn_define_global_average_pooling_2d(
500 xnn_subgraph_t subgraph,
501 float output_min,
502 float output_max,
503 uint32_t input_id,
504 uint32_t output_id,
505 uint32_t flags);
506
Marat Dukhan21d3bd62020-02-29 00:39:39 -0800507/// Define a 2D Average Pooling Node and add it to a Subgraph.
508///
509/// @param subgraph - a Subgraph object that will own the created Node.
510/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
511/// flag is specified.
512/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
513/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
514/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
515/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
516/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
517/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
518/// @param pooling_height - pooling (kernel) height.
519/// @param pooling_width - pooling (kernel) width.
520/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
521/// to vertically adjacent output pixels.
522/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
523/// to horizontally adjacent output pixels.
524/// @param output_min - lower bound for clipping output values.
525/// @param output_max - upper bound for clipping output values.
526/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
527/// with [N, IH, IW, channels] dimensions
528/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
529/// with [N, OH, OW, channels] dimensions.
530/// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is
531/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
532enum xnn_status xnn_define_average_pooling_2d(
533 xnn_subgraph_t subgraph,
534 uint32_t input_padding_top,
535 uint32_t input_padding_right,
536 uint32_t input_padding_bottom,
537 uint32_t input_padding_left,
538 uint32_t pooling_height,
539 uint32_t pooling_width,
540 uint32_t stride_height,
541 uint32_t stride_width,
542 float output_min,
543 float output_max,
544 uint32_t input_id,
545 uint32_t output_id,
546 uint32_t flags);
547
Marat Dukhan38c07ec2020-04-23 16:44:32 -0700548/// Define a Fully Connected Node and add it to a Subgraph.
549///
550/// @param subgraph - a Subgraph object that will own the created Node.
551/// @param output_min - lower bound for clipping output values.
552/// @param output_max - upper bound for clipping output values.
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700553/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the
554/// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least
555/// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if
556/// input is a 2D tensor, it must have [batch_size, input_channels] dimensions.
557/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be
558/// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of
559/// [num_input_elements] dimensions, then reshaped into a 2D tensor of
560/// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the
561/// total number of elements in the input tensor.
562/// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph.
563/// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have
564/// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is
565/// specified, the filter tensor must have [input_channels, output_channels] dimensions.
566/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias.
567/// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels]
568/// dimensions.
569/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph.
570/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same
571/// dimensionality as the input tensor, all its dimensions but the last one must match the
572/// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must
573/// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output
574/// must be a 2D tensor of [batch_size, output_channels] dimensions.
575/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of
576/// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the
577/// total number of elements in the input tensor.
578/// @param flags - binary features of the Fully Connected Node. The only currently supported values are
579/// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS.
Marat Dukhana9992252021-04-15 16:47:24 -0700580enum xnn_status xnn_define_fully_connected(
581 xnn_subgraph_t subgraph,
582 float output_min,
583 float output_max,
584 uint32_t input_id,
585 uint32_t filter_id,
586 uint32_t bias_id,
587 uint32_t output_id,
588 uint32_t flags);
Marat Dukhan38c07ec2020-04-23 16:44:32 -0700589
Marat Dukhan21d3bd62020-02-29 00:39:39 -0800590/// Define a 2D Max Pooling Node and add it to a Subgraph.
591///
592/// @param subgraph - a Subgraph object that will own the created Node.
593/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
594/// flag is specified.
595/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
596/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
597/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
598/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
599/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
600/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
601/// @param pooling_height - pooling (kernel) height.
602/// @param pooling_width - pooling (kernel) width.
603/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
604/// to vertically adjacent output pixels.
605/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
606/// to horizontally adjacent output pixels.
607/// @param dilation_height - dilation of pooling elements along the height dimension.
608/// @param dilation_width - dilation of pooling elements along the width dimension.
609/// @param output_min - lower bound for clipping output values.
610/// @param output_max - upper bound for clipping output values.
611/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
612/// with [N, IH, IW, channels] dimensions
613/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
614/// with [N, OH, OW, channels] dimensions.
615/// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is
616/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
617enum xnn_status xnn_define_max_pooling_2d(
618 xnn_subgraph_t subgraph,
619 uint32_t input_padding_top,
620 uint32_t input_padding_right,
621 uint32_t input_padding_bottom,
622 uint32_t input_padding_left,
623 uint32_t pooling_height,
624 uint32_t pooling_width,
625 uint32_t stride_height,
626 uint32_t stride_width,
627 uint32_t dilation_height,
628 uint32_t dilation_width,
629 float output_min,
630 float output_max,
631 uint32_t input_id,
632 uint32_t output_id,
633 uint32_t flags);
634
Marat Dukhan5cb16e72020-05-05 16:41:57 -0700635/// Define a 2D ArgMax Pooling Node and add it to a Subgraph.
636///
637/// @param subgraph - a Subgraph object that will own the created Node.
638/// @param input_padding_top - implicit zero-padding above 2D input data.
639/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
640/// @param input_padding_bottom - implicit zero-padding below 2D input data.
641/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
642/// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value.
643/// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value.
644/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
645/// with [N, IH, IW, channels] dimensions
646/// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must
647/// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions.
648/// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The
649/// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels]
650/// dimensions.
651/// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined.
652enum xnn_status xnn_define_argmax_pooling_2d(
653 xnn_subgraph_t subgraph,
654 uint32_t input_padding_top,
655 uint32_t input_padding_right,
656 uint32_t input_padding_bottom,
657 uint32_t input_padding_left,
658 uint32_t pooling_height,
659 uint32_t pooling_width,
660 uint32_t input_id,
661 uint32_t output_value_id,
662 uint32_t output_index_id,
663 uint32_t flags);
664
665/// Define a 2D UnPooling Node and add it to a Subgraph.
666///
667/// @param subgraph - a Subgraph object that will own the created Node.
668/// @param padding_top - implicit padding above 2D output data.
669/// @param padding_right - implicit padding to the right of 2D output data.
670/// @param padding_bottom - implicit padding below 2D output data.
671/// @param padding_left - implicit padding to the left of 2D output data.
672/// @param pooling_height - height of the pooling window.
673/// @param pooling_width - width of the pooling window.
674/// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor
675/// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions.
676/// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by
677/// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with
678/// [N, IH, IW, channels] dimensions.
679/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
680/// with [N, OH, OW, channels] dimensions.
681/// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined.
682enum xnn_status xnn_define_unpooling_2d(
683 xnn_subgraph_t subgraph,
684 uint32_t padding_top,
685 uint32_t padding_right,
686 uint32_t padding_bottom,
687 uint32_t padding_left,
688 uint32_t pooling_height,
689 uint32_t pooling_width,
690 uint32_t input_value_id,
691 uint32_t input_index_id,
692 uint32_t output_id,
693 uint32_t flags);
694
Marat Dukhan54dcb462020-02-10 11:06:12 -0800695/// Define a 2-Input Add Node and add it to a Subgraph.
696///
697/// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules.
698///
699/// @param subgraph - a Subgraph object that will own the created Node.
700/// @param output_min - lower bound for clipping output values.
701/// @param output_max - upper bound for clipping output values.
702/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
703/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
704/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
705/// that dimension.
706/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
707/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
708/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
709/// that dimension.
710/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
711/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
712/// of the two inputs.
713/// @param flags - binary features of the Add Node. No supported flags are currently defined.
714enum xnn_status xnn_define_add2(
715 xnn_subgraph_t subgraph,
716 float output_min,
717 float output_max,
718 uint32_t input1_id,
719 uint32_t input2_id,
720 uint32_t output_id,
721 uint32_t flags);
722
723/// Define a 2-Input Multiply Node and add it to a Subgraph.
724///
725/// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules.
726///
727/// @param subgraph - a Subgraph object that will own the created Node.
728/// @param output_min - lower bound for clipping output values.
729/// @param output_max - upper bound for clipping output values.
730/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
731/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
732/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
733/// that dimension.
734/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
735/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
736/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
737/// that dimension.
738/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
739/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
740/// of the two inputs.
741/// @param flags - binary features of the Multiply Node. No supported flags are currently defined.
742enum xnn_status xnn_define_multiply2(
743 xnn_subgraph_t subgraph,
744 float output_min,
745 float output_max,
746 uint32_t input1_id,
747 uint32_t input2_id,
748 uint32_t output_id,
749 uint32_t flags);
750
Marat Dukhan9d3a4592020-06-05 16:52:42 -0700751/// Define a Subtract Node and add it to a Subgraph.
752///
753/// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules.
754///
755/// @param subgraph - a Subgraph object that will own the created Node.
756/// @param output_min - lower bound for clipping output values.
757/// @param output_max - upper bound for clipping output values.
758/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
759/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
760/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
761/// that dimension.
762/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
763/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
764/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
765/// that dimension.
766/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
767/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
768/// of the two inputs.
769/// @param flags - binary features of the Subtract Node. No supported flags are currently defined.
770enum xnn_status xnn_define_subtract(
771 xnn_subgraph_t subgraph,
772 float output_min,
773 float output_max,
774 uint32_t input1_id,
775 uint32_t input2_id,
776 uint32_t output_id,
777 uint32_t flags);
778
779/// Define a Divide Node and add it to a Subgraph.
780///
781/// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules.
782///
783/// @param subgraph - a Subgraph object that will own the created Node.
784/// @param output_min - lower bound for clipping output values.
785/// @param output_max - upper bound for clipping output values.
786/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
787/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
788/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
789/// that dimension.
790/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
791/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
792/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
793/// that dimension.
794/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
795/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
796/// of the two inputs.
797/// @param flags - binary features of the Divide Node. No supported flags are currently defined.
798enum xnn_status xnn_define_divide(
799 xnn_subgraph_t subgraph,
800 float output_min,
801 float output_max,
802 uint32_t input1_id,
803 uint32_t input2_id,
804 uint32_t output_id,
805 uint32_t flags);
806
807/// Define a 2-Input Maximum Node and add it to a Subgraph.
808///
809/// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules.
810///
811/// @param subgraph - a Subgraph object that will own the created Node.
812/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
813/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
814/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
815/// that dimension.
816/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
817/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
818/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
819/// that dimension.
820/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
821/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
822/// of the two inputs.
823/// @param flags - binary features of the Maximum Node. No supported flags are currently defined.
824enum xnn_status xnn_define_maximum2(
825 xnn_subgraph_t subgraph,
826 uint32_t input1_id,
827 uint32_t input2_id,
828 uint32_t output_id,
829 uint32_t flags);
830
831/// Define a 2-Input Minimum Node and add it to a Subgraph.
832///
833/// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules.
834///
835/// @param subgraph - a Subgraph object that will own the created Node.
836/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
837/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
838/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
839/// that dimension.
840/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
841/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
842/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
843/// that dimension.
844/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
845/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
846/// of the two inputs.
847/// @param flags - binary features of the Minimum Node. No supported flags are currently defined.
848enum xnn_status xnn_define_minimum2(
849 xnn_subgraph_t subgraph,
850 uint32_t input1_id,
851 uint32_t input2_id,
852 uint32_t output_id,
853 uint32_t flags);
854
855/// Define a Squared Difference Node and add it to a Subgraph.
856///
857/// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting
858/// rules.
859///
860/// @param subgraph - a Subgraph object that will own the created Node.
861/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
862/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
863/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
864/// that dimension.
865/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
866/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
867/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
868/// that dimension.
869/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
870/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
871/// of the two inputs.
872/// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined.
873enum xnn_status xnn_define_squared_difference(
874 xnn_subgraph_t subgraph,
875 uint32_t input1_id,
876 uint32_t input2_id,
877 uint32_t output_id,
878 uint32_t flags);
879
Marat Dukhanab2946c2020-05-21 20:04:13 -0700880/// Define a Constant Pad Node with static padding specification and add it to a Subgraph.
881///
882/// @param subgraph - a Subgraph object that will own the created Node.
883/// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array
884/// must have as many elements as the the number of dimensions in the input tensor.
885/// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700886/// must have as many elements as the the number of dimensions in the input tensor.
Marat Dukhanab2946c2020-05-21 20:04:13 -0700887/// @param padding_value - constant value used to initialize padding elements.
888/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
889/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
890/// shape must match the shape of the input tensor with padding.
Marat Dukhand27202d2020-07-09 23:43:40 -0700891/// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined.
Marat Dukhanab2946c2020-05-21 20:04:13 -0700892enum xnn_status xnn_define_static_constant_pad(
893 xnn_subgraph_t subgraph,
894 const size_t* pre_paddings,
895 const size_t* post_paddings,
896 float padding_value,
897 uint32_t input_id,
898 uint32_t output_id,
899 uint32_t flags);
900
Marat Dukhand27202d2020-07-09 23:43:40 -0700901/// Define a Reshape Node with static shape specification and add it to a Subgraph.
902///
903/// @param subgraph - a Subgraph object that will own the created Node.
904/// @param num_dims - number of shape dimensions in the output tensor.
905/// @param new_shape - shape dimensions of the output tensor.
906/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
907/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
908/// shape must match the shape of the input tensor with padding.
909/// @param flags - binary features of the Reshape Node. No supported flags are currently defined.
910enum xnn_status xnn_define_static_reshape(
911 xnn_subgraph_t subgraph,
912 size_t num_dims,
913 const size_t* new_shape,
914 uint32_t input_id,
915 uint32_t output_id,
916 uint32_t flags);
917
Marat Dukhanaff24e22020-07-23 01:43:58 -0700918/// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph.
919///
920/// @param subgraph - a Subgraph object that will own the created Node.
921/// @param new_height - height dimension of the output tensor.
922/// @param new_width - width dimension of the output tensor.
923/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700924/// with [N, H, W, C] dimensions.
Marat Dukhanaff24e22020-07-23 01:43:58 -0700925/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
926/// with [N, new_height, new_width, C] dimensions.
927/// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are
928/// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive.
929enum xnn_status xnn_define_static_resize_bilinear_2d(
930 xnn_subgraph_t subgraph,
931 size_t new_height,
932 size_t new_width,
933 uint32_t input_id,
934 uint32_t output_id,
935 uint32_t flags);
936
Marat Dukhan2fd2ba12020-02-10 13:14:45 -0800937/// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph.
938///
939/// @param subgraph - a Subgraph object that will own the created Node.
940/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
Marat Dukhan853bb7a2021-04-15 15:52:25 -0700941/// with [N, H, W, channels] dimensions.
Marat Dukhan2fd2ba12020-02-10 13:14:45 -0800942/// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
943/// [channels] dimensions.
944/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
945/// with [N, H, W, channels] dimensions.
946/// @param flags - binary features of the PReLU Node. No supported flags are currently defined.
947enum xnn_status xnn_define_prelu(
948 xnn_subgraph_t subgraph,
949 uint32_t input_id,
950 uint32_t slope_id,
951 uint32_t output_id,
952 uint32_t flags);
953
Marat Dukhan5fab4092020-06-10 01:28:28 -0700954/// Define a Abs Node and add it to a Subgraph.
955///
956/// @param subgraph - a Subgraph object that will own the created Node.
957/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
958/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
959/// shape must match the shape of the input tensor.
960/// @param flags - binary features of the Abs Node. No supported flags are currently defined.
961enum xnn_status xnn_define_abs(
962 xnn_subgraph_t subgraph,
963 uint32_t input_id,
964 uint32_t output_id,
965 uint32_t flags);
966
967/// Define a Bankers' Rounding Node and add it to a Subgraph.
968///
969/// @param subgraph - a Subgraph object that will own the created Node.
970/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
971/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
972/// shape must match the shape of the input tensor.
973/// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined.
974enum xnn_status xnn_define_bankers_rounding(
975 xnn_subgraph_t subgraph,
976 uint32_t input_id,
977 uint32_t output_id,
978 uint32_t flags);
979
980/// Define a Ceiling Node and add it to a Subgraph.
981///
982/// @param subgraph - a Subgraph object that will own the created Node.
983/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
984/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
985/// shape must match the shape of the input tensor.
986/// @param flags - binary features of the Ceiling Node. No supported flags are currently defined.
987enum xnn_status xnn_define_ceiling(
988 xnn_subgraph_t subgraph,
989 uint32_t input_id,
990 uint32_t output_id,
991 uint32_t flags);
992
Marat Dukhan52bd86f2020-02-11 18:21:51 -0800993/// Define a Clamp Node and add it to a Subgraph.
994///
995/// @param subgraph - a Subgraph object that will own the created Node.
996/// @param output_min - lower bound for clipping output values.
997/// @param output_max - upper bound for clipping output values.
998/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
999/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1000/// shape must match the shape of the input tensor.
1001/// @param flags - binary features of the Clamp Node. No supported flags are currently defined.
1002enum xnn_status xnn_define_clamp(
1003 xnn_subgraph_t subgraph,
1004 float output_min,
1005 float output_max,
1006 uint32_t input_id,
1007 uint32_t output_id,
1008 uint32_t flags);
1009
Marat Dukhana1600202020-12-01 22:17:16 -08001010/// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph.
1011///
1012/// @param subgraph - a Subgraph object that will own the created Node.
1013/// @param alpha - scale factor for negative output elements.
1014/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1015/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1016/// shape must match the shape of the input tensor.
1017/// @param flags - binary features of the ELU Node. No supported flags are currently defined.
1018enum xnn_status xnn_define_elu(
1019 xnn_subgraph_t subgraph,
1020 float alpha,
1021 uint32_t input_id,
1022 uint32_t output_id,
1023 uint32_t flags);
1024
Marat Dukhan5fab4092020-06-10 01:28:28 -07001025/// Define a Floor Node and add it to a Subgraph.
1026///
1027/// @param subgraph - a Subgraph object that will own the created Node.
1028/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1029/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1030/// shape must match the shape of the input tensor.
1031/// @param flags - binary features of the Floor Node. No supported flags are currently defined.
1032enum xnn_status xnn_define_floor(
1033 xnn_subgraph_t subgraph,
1034 uint32_t input_id,
1035 uint32_t output_id,
1036 uint32_t flags);
1037
Marat Dukhan52bd86f2020-02-11 18:21:51 -08001038/// Define a HardSwish Node and add it to a Subgraph.
1039///
1040/// @param subgraph - a Subgraph object that will own the created Node.
1041/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1042/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1043/// shape must match the shape of the input tensor.
1044/// @param flags - binary features of the HardSwish Node. No supported flags are currently defined.
1045enum xnn_status xnn_define_hardswish(
1046 xnn_subgraph_t subgraph,
1047 uint32_t input_id,
1048 uint32_t output_id,
1049 uint32_t flags);
1050
Marat Dukhan5bbebac2020-06-10 19:42:15 -07001051/// Define a Leaky ReLU Node and add it to a Subgraph.
1052///
1053/// @param subgraph - a Subgraph object that will own the created Node.
1054/// @param negative_slope - scale factor for negative input elements.
1055/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1056/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1057/// shape must match the shape of the input tensor.
1058/// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined.
1059enum xnn_status xnn_define_leaky_relu(
1060 xnn_subgraph_t subgraph,
1061 float negative_slope,
1062 uint32_t input_id,
1063 uint32_t output_id,
1064 uint32_t flags);
1065
Marat Dukhan5fab4092020-06-10 01:28:28 -07001066/// Define a Negate Node and add it to a Subgraph.
1067///
1068/// @param subgraph - a Subgraph object that will own the created Node.
1069/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1070/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1071/// shape must match the shape of the input tensor.
1072/// @param flags - binary features of the Negate Node. No supported flags are currently defined.
1073enum xnn_status xnn_define_negate(
1074 xnn_subgraph_t subgraph,
1075 uint32_t input_id,
1076 uint32_t output_id,
1077 uint32_t flags);
1078
Marat Dukhan52bd86f2020-02-11 18:21:51 -08001079/// Define a Sigmoid Node and add it to a Subgraph.
1080///
1081/// @param subgraph - a Subgraph object that will own the created Node.
1082/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1083/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1084/// shape must match the shape of the input tensor.
1085/// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined.
1086enum xnn_status xnn_define_sigmoid(
1087 xnn_subgraph_t subgraph,
1088 uint32_t input_id,
1089 uint32_t output_id,
1090 uint32_t flags);
1091
1092/// Define a SoftMax Node and add it to a Subgraph.
1093///
1094/// @param subgraph - a Subgraph object that will own the created Node.
1095/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at
1096/// least one dimension.
1097/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1098/// shape must match the shape of the input tensor.
1099/// @param flags - binary features of the SoftMax Node. No supported flags are currently defined.
1100enum xnn_status xnn_define_softmax(
1101 xnn_subgraph_t subgraph,
1102 uint32_t input_id,
1103 uint32_t output_id,
1104 uint32_t flags);
1105
Marat Dukhan5fab4092020-06-10 01:28:28 -07001106/// Define a Square Node and add it to a Subgraph.
1107///
1108/// @param subgraph - a Subgraph object that will own the created Node.
1109/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1110/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1111/// shape must match the shape of the input tensor.
1112/// @param flags - binary features of the Square Node. No supported flags are currently defined.
1113enum xnn_status xnn_define_square(
1114 xnn_subgraph_t subgraph,
1115 uint32_t input_id,
1116 uint32_t output_id,
1117 uint32_t flags);
1118
Marat Dukhan51a01c62020-07-09 03:26:57 -07001119/// Define a Square Root Node and add it to a Subgraph.
1120///
1121/// @param subgraph - a Subgraph object that will own the created Node.
1122/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1123/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1124/// shape must match the shape of the input tensor.
1125/// @param flags - binary features of the Square Root Node. No supported flags are currently defined.
1126enum xnn_status xnn_define_square_root(
1127 xnn_subgraph_t subgraph,
1128 uint32_t input_id,
1129 uint32_t output_id,
1130 uint32_t flags);
1131
Marat Dukhandd2b5882020-02-06 15:12:26 -08001132/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001133typedef struct xnn_runtime* xnn_runtime_t;
1134
Marat Dukhan7332e832020-12-06 23:26:11 -08001135/// Create a Runtime object from a subgraph.
Marat Dukhandd2b5882020-02-06 15:12:26 -08001136///
1137/// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or
1138/// Nodes can be added to the runtime once it is constructed.
1139/// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
1140/// pool is NULL, the computation would run on the caller thread without parallelization.
Marat Dukhan4620ca62022-02-03 12:31:00 -08001141/// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE,
1142/// XNN_FLAG_FP16_INFERENCE, and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker
1143/// threads would be yielded to the system scheduler after processing the last operator in the Runtime.
Marat Dukhandd2b5882020-02-06 15:12:26 -08001144/// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
1145/// successful return. Once constructed, the Runtime object is independent of the Subgraph object
1146/// used to create it.
Marat Dukhan022c6592020-02-05 18:07:41 -08001147enum xnn_status xnn_create_runtime_v2(
1148 xnn_subgraph_t subgraph,
1149 pthreadpool_t threadpool,
1150 uint32_t flags,
1151 xnn_runtime_t* runtime_out);
1152
Marat Dukhandd2b5882020-02-06 15:12:26 -08001153enum xnn_status xnn_create_runtime(
1154 xnn_subgraph_t subgraph,
1155 xnn_runtime_t* runtime_out);
1156
Marat Dukhan1d75a542020-02-03 12:23:01 -08001157struct xnn_external_value {
1158 uint32_t id;
1159 void* data;
1160};
1161
Marat Dukhandd2b5882020-02-06 15:12:26 -08001162/// Setup data pointers for external inputs and outputs in a Runtime object.
1163///
1164/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2.
1165/// @param num_external_values - the number of external inputs and outputs specified in this call. This number must
1166/// match the number of external inputs and outputs in the runtime, i.e. all external
1167/// inputs and outputs in the runtime must be specified in one call.
1168/// @param external_values - array with location information for all external inputs and outputs in the runtime.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001169enum xnn_status xnn_setup_runtime(
1170 xnn_runtime_t runtime,
1171 size_t num_external_values,
1172 const struct xnn_external_value* external_values);
1173
Marat Dukhandd2b5882020-02-06 15:12:26 -08001174/// Execute forward pass for all operators in the runtime.
1175///
1176/// @param runtime - the Runtime object with the execution plan to invoke.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001177enum xnn_status xnn_invoke_runtime(
1178 xnn_runtime_t runtime);
1179
Marat Dukhandd2b5882020-02-06 15:12:26 -08001180/// Destroy a Runtime object, as well as operators and memory associated with it.
1181///
1182/// @param runtime - the Runtime object to destroy.
Marat Dukhan1d75a542020-02-03 12:23:01 -08001183enum xnn_status xnn_delete_runtime(
1184 xnn_runtime_t runtime);
1185
XNNPACK Teamb455b122019-09-27 18:10:33 -07001186typedef struct xnn_operator* xnn_operator_t;
1187
Marat Dukhand6209722019-10-07 12:54:25 -07001188enum xnn_status xnn_run_operator(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001189 xnn_operator_t op,
1190 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001191
1192enum xnn_status xnn_delete_operator(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001193 xnn_operator_t op);
Marat Dukhand6209722019-10-07 12:54:25 -07001194
1195#ifndef XNN_NO_F32_OPERATORS
1196
Marat Dukhan5020b962020-06-08 13:30:10 -07001197enum xnn_status xnn_create_abs_nc_f32(
1198 size_t channels,
1199 size_t input_stride,
1200 size_t output_stride,
1201 uint32_t flags,
1202 xnn_operator_t* abs_op_out);
1203
1204enum xnn_status xnn_setup_abs_nc_f32(
1205 xnn_operator_t abs_op,
1206 size_t batch_size,
1207 const float* input,
1208 float* output,
1209 pthreadpool_t threadpool);
1210
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001211enum xnn_status xnn_create_add_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001212 float output_min,
1213 float output_max,
1214 uint32_t flags,
1215 xnn_operator_t* add_op_out);
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001216
1217enum xnn_status xnn_setup_add_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001218 xnn_operator_t add_op,
1219 size_t num_input1_dims,
1220 const size_t* input1_shape,
1221 size_t num_input2_dims,
1222 const size_t* input2_shape,
1223 const float* input1,
1224 const float* input2,
1225 float* output,
1226 pthreadpool_t threadpool);
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08001227
Marat Dukhand6209722019-10-07 12:54:25 -07001228enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001229 uint32_t input_padding_top,
1230 uint32_t input_padding_right,
1231 uint32_t input_padding_bottom,
1232 uint32_t input_padding_left,
1233 uint32_t pooling_height,
1234 uint32_t pooling_width,
1235 size_t channels,
1236 size_t input_pixel_stride,
1237 size_t output_pixel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001238 uint32_t flags,
1239 xnn_operator_t* argmax_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001240
1241enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001242 xnn_operator_t argmax_pooling_op,
1243 size_t batch_size,
1244 size_t input_height,
1245 size_t input_width,
1246 const float* input,
1247 float* output,
1248 uint32_t* index,
1249 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001250
1251enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001252 uint32_t input_padding_top,
1253 uint32_t input_padding_right,
1254 uint32_t input_padding_bottom,
1255 uint32_t input_padding_left,
1256 uint32_t pooling_height,
1257 uint32_t pooling_width,
1258 uint32_t stride_height,
1259 uint32_t stride_width,
1260 size_t channels,
1261 size_t input_pixel_stride,
1262 size_t output_pixel_stride,
1263 float output_min,
1264 float output_max,
1265 uint32_t flags,
1266 xnn_operator_t* average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001267
1268enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001269 xnn_operator_t average_pooling_op,
1270 size_t batch_size,
1271 size_t input_height,
1272 size_t input_width,
1273 const float* input,
1274 float* output,
1275 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001276
Marat Dukhan64e52512020-06-09 13:41:16 -07001277enum xnn_status xnn_create_bankers_rounding_nc_f32(
1278 size_t channels,
1279 size_t input_stride,
1280 size_t output_stride,
1281 uint32_t flags,
1282 xnn_operator_t* rounding_op_out);
1283
1284enum xnn_status xnn_setup_bankers_rounding_nc_f32(
1285 xnn_operator_t rounding_op,
1286 size_t batch_size,
1287 const float* input,
1288 float* output,
1289 pthreadpool_t threadpool);
1290
1291enum xnn_status xnn_create_ceiling_nc_f32(
1292 size_t channels,
1293 size_t input_stride,
1294 size_t output_stride,
1295 uint32_t flags,
1296 xnn_operator_t* ceiling_op_out);
1297
1298enum xnn_status xnn_setup_ceiling_nc_f32(
1299 xnn_operator_t ceiling_op,
1300 size_t batch_size,
1301 const float* input,
1302 float* output,
1303 pthreadpool_t threadpool);
1304
Marat Dukhand6209722019-10-07 12:54:25 -07001305enum xnn_status xnn_create_clamp_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001306 size_t channels,
1307 size_t input_stride,
1308 size_t output_stride,
1309 float output_min,
1310 float output_max,
1311 uint32_t flags,
1312 xnn_operator_t* clamp_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001313
1314enum xnn_status xnn_setup_clamp_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001315 xnn_operator_t clamp_op,
1316 size_t batch_size,
1317 const float* input,
1318 float* output,
1319 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001320
1321enum xnn_status xnn_create_convolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001322 uint32_t input_padding_top,
1323 uint32_t input_padding_right,
1324 uint32_t input_padding_bottom,
1325 uint32_t input_padding_left,
1326 uint32_t kernel_height,
1327 uint32_t kernel_width,
1328 uint32_t subsampling_height,
1329 uint32_t subsampling_width,
1330 uint32_t dilation_height,
1331 uint32_t dilation_width,
1332 uint32_t groups,
1333 size_t group_input_channels,
1334 size_t group_output_channels,
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001335 size_t input_channel_stride,
1336 size_t output_channel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001337 const float* kernel,
1338 const float* bias,
1339 float output_min,
1340 float output_max,
1341 uint32_t flags,
1342 xnn_operator_t* convolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001343
1344enum xnn_status xnn_setup_convolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001345 xnn_operator_t convolution_op,
1346 size_t batch_size,
1347 size_t input_height,
1348 size_t input_width,
1349 const float* input,
1350 float* output,
1351 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001352
1353enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001354 uint32_t output_padding_top,
1355 uint32_t output_padding_right,
1356 uint32_t output_padding_bottom,
1357 uint32_t output_padding_left,
1358 uint32_t kernel_height,
1359 uint32_t kernel_width,
1360 uint32_t stride_height,
1361 uint32_t stride_width,
1362 uint32_t dilation_height,
1363 uint32_t dilation_width,
1364 uint32_t groups,
1365 size_t group_input_channels,
1366 size_t group_output_channels,
1367 size_t input_pixel_stride,
1368 size_t output_pixel_stride,
1369 const float* kernel,
1370 const float* bias,
1371 float output_min,
1372 float output_max,
1373 uint32_t flags,
1374 xnn_operator_t* deconvolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001375
1376enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001377 xnn_operator_t deconvolution_op,
1378 size_t batch_size,
1379 size_t input_height,
1380 size_t input_width,
1381 uint32_t adjustment_height,
1382 uint32_t adjustment_width,
1383 const float* input,
1384 float* output,
1385 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001386
Marat Dukhan69180502019-12-06 15:00:31 -08001387enum xnn_status xnn_create_divide_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001388 float output_min,
1389 float output_max,
1390 uint32_t flags,
1391 xnn_operator_t* divide_op_out);
Marat Dukhan69180502019-12-06 15:00:31 -08001392
1393enum xnn_status xnn_setup_divide_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001394 xnn_operator_t divide_op,
1395 size_t num_input1_dims,
1396 const size_t* input1_shape,
1397 size_t num_input2_dims,
1398 const size_t* input2_shape,
1399 const float* input1,
1400 const float* input2,
1401 float* output,
1402 pthreadpool_t threadpool);
Marat Dukhan69180502019-12-06 15:00:31 -08001403
Marat Dukhanb6bd4bc2020-12-01 17:01:40 -08001404enum xnn_status xnn_create_elu_nc_f32(
1405 size_t channels,
1406 size_t input_stride,
1407 size_t output_stride,
1408 float alpha,
1409 uint32_t flags,
1410 xnn_operator_t* elu_op_out);
1411
1412enum xnn_status xnn_setup_elu_nc_f32(
1413 xnn_operator_t elu_op,
1414 size_t batch_size,
1415 const float* input,
1416 float* output,
1417 pthreadpool_t threadpool);
1418
Marat Dukhanddb3d162021-10-25 17:05:51 -07001419enum xnn_status xnn_create_floor_nc_f32(
1420 size_t channels,
1421 size_t input_stride,
1422 size_t output_stride,
1423 uint32_t flags,
1424 xnn_operator_t* floor_op_out);
1425
1426enum xnn_status xnn_setup_floor_nc_f32(
1427 xnn_operator_t floor_op,
1428 size_t batch_size,
1429 const float* input,
1430 float* output,
1431 pthreadpool_t threadpool);
1432
Marat Dukhand6209722019-10-07 12:54:25 -07001433enum xnn_status xnn_create_fully_connected_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001434 size_t input_channels,
1435 size_t output_channels,
1436 size_t input_stride,
1437 size_t output_stride,
1438 const float* kernel,
1439 const float* bias,
1440 float output_min,
1441 float output_max,
1442 uint32_t flags,
1443 xnn_operator_t* fully_connected_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001444
1445enum xnn_status xnn_setup_fully_connected_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001446 xnn_operator_t fully_connected_op,
1447 size_t batch_size,
1448 const float* input,
1449 float* output,
1450 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001451
1452enum xnn_status xnn_create_global_average_pooling_nwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001453 size_t channels,
1454 size_t input_stride,
1455 size_t output_stride,
1456 float output_min,
1457 float output_max,
1458 uint32_t flags,
1459 xnn_operator_t* global_average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001460
1461enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001462 xnn_operator_t global_average_pooling_op,
1463 size_t batch_size,
1464 size_t width,
1465 const float* input,
1466 float* output,
1467 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001468
1469enum xnn_status xnn_create_hardswish_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001470 size_t channels,
1471 size_t input_stride,
1472 size_t output_stride,
1473 uint32_t flags,
1474 xnn_operator_t* hardswish_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001475
1476enum xnn_status xnn_setup_hardswish_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001477 xnn_operator_t hardswish_op,
1478 size_t batch_size,
1479 const float* input,
1480 float* output,
1481 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001482
Marat Dukhan28813332020-06-10 18:05:38 -07001483enum xnn_status xnn_create_leaky_relu_nc_f32(
1484 size_t channels,
1485 size_t input_stride,
1486 size_t output_stride,
1487 float negative_slope,
1488 uint32_t flags,
1489 xnn_operator_t* leaky_relu_op_out);
1490
1491enum xnn_status xnn_setup_leaky_relu_nc_f32(
1492 xnn_operator_t leaky_relu_op,
1493 size_t batch_size,
1494 const float* input,
1495 float* output,
1496 pthreadpool_t threadpool);
1497
Marat Dukhand6209722019-10-07 12:54:25 -07001498enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001499 uint32_t input_padding_top,
1500 uint32_t input_padding_right,
1501 uint32_t input_padding_bottom,
1502 uint32_t input_padding_left,
1503 uint32_t pooling_height,
1504 uint32_t pooling_width,
1505 uint32_t stride_height,
1506 uint32_t stride_width,
1507 uint32_t dilation_height,
1508 uint32_t dilation_width,
1509 size_t channels,
1510 size_t input_pixel_stride,
1511 size_t output_pixel_stride,
1512 float output_min,
1513 float output_max,
1514 uint32_t flags,
1515 xnn_operator_t* max_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001516
1517enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001518 xnn_operator_t max_pooling_op,
1519 size_t batch_size,
1520 size_t input_height,
1521 size_t input_width,
1522 const float* input,
1523 float* output,
1524 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001525
Marat Dukhan79e7f842019-12-05 14:35:50 -08001526enum xnn_status xnn_create_maximum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001527 uint32_t flags,
1528 xnn_operator_t* maximum_op_out);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001529
1530enum xnn_status xnn_setup_maximum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001531 xnn_operator_t maximum_op,
1532 size_t num_input1_dims,
1533 const size_t* input1_shape,
1534 size_t num_input2_dims,
1535 const size_t* input2_shape,
1536 const float* input1,
1537 const float* input2,
1538 float* output,
1539 pthreadpool_t threadpool);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001540
1541enum xnn_status xnn_create_minimum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001542 uint32_t flags,
1543 xnn_operator_t* minimum_op_out);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001544
1545enum xnn_status xnn_setup_minimum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001546 xnn_operator_t minimum_op,
1547 size_t num_input1_dims,
1548 const size_t* input1_shape,
1549 size_t num_input2_dims,
1550 const size_t* input2_shape,
1551 const float* input1,
1552 const float* input2,
1553 float* output,
1554 pthreadpool_t threadpool);
Marat Dukhan79e7f842019-12-05 14:35:50 -08001555
Marat Dukhanca2733c2019-11-15 23:21:17 -08001556enum xnn_status xnn_create_multiply_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001557 float output_min,
1558 float output_max,
1559 uint32_t flags,
1560 xnn_operator_t* multiply_op_out);
Marat Dukhanca2733c2019-11-15 23:21:17 -08001561
1562enum xnn_status xnn_setup_multiply_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001563 xnn_operator_t multiply_op,
1564 size_t num_input1_dims,
1565 const size_t* input1_shape,
1566 size_t num_input2_dims,
1567 const size_t* input2_shape,
1568 const float* input1,
1569 const float* input2,
1570 float* output,
1571 pthreadpool_t threadpool);
Marat Dukhanca2733c2019-11-15 23:21:17 -08001572
Marat Dukhan5020b962020-06-08 13:30:10 -07001573enum xnn_status xnn_create_negate_nc_f32(
1574 size_t channels,
1575 size_t input_stride,
1576 size_t output_stride,
1577 uint32_t flags,
1578 xnn_operator_t* negate_op_out);
1579
1580enum xnn_status xnn_setup_negate_nc_f32(
1581 xnn_operator_t negate_op,
1582 size_t batch_size,
1583 const float* input,
1584 float* output,
1585 pthreadpool_t threadpool);
1586
Marat Dukhand6209722019-10-07 12:54:25 -07001587enum xnn_status xnn_create_prelu_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001588 size_t channels,
1589 size_t input_stride,
1590 size_t output_stride,
1591 const float* negative_slope,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001592 uint32_t flags,
1593 xnn_operator_t* prelu_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001594
1595enum xnn_status xnn_setup_prelu_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001596 xnn_operator_t prelu_op,
1597 size_t batch_size,
1598 const float* input,
1599 float* output,
1600 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001601
Artsiom Ablavatski97918102020-10-27 15:52:59 -07001602enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
1603 size_t channels,
1604 size_t input_pixel_stride,
1605 size_t output_pixel_stride,
1606 uint32_t flags,
1607 xnn_operator_t* resize_op_out);
1608
1609enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
1610 xnn_operator_t resize_op,
1611 size_t batch_size,
1612 size_t input_height,
1613 size_t input_width,
1614 size_t output_height,
1615 size_t output_width,
1616 const float* input,
1617 float* output,
1618 pthreadpool_t threadpool);
1619
Marat Dukhan69722492019-11-11 19:55:50 -08001620enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001621 size_t channels,
1622 size_t input_pixel_stride,
1623 size_t output_pixel_stride,
1624 uint32_t flags,
1625 xnn_operator_t* resize_op_out);
Marat Dukhan69722492019-11-11 19:55:50 -08001626
1627enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001628 xnn_operator_t resize_op,
1629 size_t batch_size,
1630 size_t input_height,
1631 size_t input_width,
1632 size_t output_height,
1633 size_t output_width,
1634 const float* input,
1635 float* output,
1636 pthreadpool_t threadpool);
Marat Dukhan69722492019-11-11 19:55:50 -08001637
Marat Dukhan346a9e52019-11-15 09:06:30 -08001638enum xnn_status xnn_create_sigmoid_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001639 size_t channels,
1640 size_t input_stride,
1641 size_t output_stride,
1642 uint32_t flags,
1643 xnn_operator_t* sigmoid_op_out);
Marat Dukhan346a9e52019-11-15 09:06:30 -08001644
1645enum xnn_status xnn_setup_sigmoid_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001646 xnn_operator_t sigmoid_op,
1647 size_t batch_size,
1648 const float* input,
1649 float* output,
1650 pthreadpool_t threadpool);
Marat Dukhan346a9e52019-11-15 09:06:30 -08001651
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001652enum xnn_status xnn_create_softmax_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001653 size_t channels,
1654 size_t input_stride,
1655 size_t output_stride,
1656 uint32_t flags,
1657 xnn_operator_t* softmax_op_out);
Marat Dukhan1edc4542020-01-27 12:40:13 -08001658
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001659enum xnn_status xnn_setup_softmax_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001660 xnn_operator_t softmax_op,
1661 size_t batch_size,
1662 const float* input,
1663 float* output,
1664 pthreadpool_t threadpool);
Marat Dukhan1edc4542020-01-27 12:40:13 -08001665
Marat Dukhan5020b962020-06-08 13:30:10 -07001666enum xnn_status xnn_create_square_nc_f32(
1667 size_t channels,
1668 size_t input_stride,
1669 size_t output_stride,
1670 uint32_t flags,
1671 xnn_operator_t* square_op_out);
1672
1673enum xnn_status xnn_setup_square_nc_f32(
1674 xnn_operator_t square_op,
1675 size_t batch_size,
1676 const float* input,
1677 float* output,
1678 pthreadpool_t threadpool);
1679
Marat Dukhan6804bbd2020-06-30 19:26:11 -07001680enum xnn_status xnn_create_square_root_nc_f32(
1681 size_t channels,
1682 size_t input_stride,
1683 size_t output_stride,
1684 uint32_t flags,
1685 xnn_operator_t* sqrt_op_out);
1686
1687enum xnn_status xnn_setup_square_root_nc_f32(
1688 xnn_operator_t sqrt_op,
1689 size_t batch_size,
1690 const float* input,
1691 float* output,
1692 pthreadpool_t threadpool);
1693
Marat Dukhanf7399262020-06-05 10:58:44 -07001694enum xnn_status xnn_create_squared_difference_nd_f32(
1695 uint32_t flags,
1696 xnn_operator_t* squared_difference_op_out);
1697
1698enum xnn_status xnn_setup_squared_difference_nd_f32(
1699 xnn_operator_t squared_difference_op,
1700 size_t num_input1_dims,
1701 const size_t* input1_shape,
1702 size_t num_input2_dims,
1703 const size_t* input2_shape,
1704 const float* input1,
1705 const float* input2,
1706 float* output,
1707 pthreadpool_t threadpool);
1708
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001709enum xnn_status xnn_create_subtract_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001710 float output_min,
1711 float output_max,
1712 uint32_t flags,
1713 xnn_operator_t* subtract_op_out);
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001714
1715enum xnn_status xnn_setup_subtract_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001716 xnn_operator_t subtract_op,
1717 size_t num_input1_dims,
1718 const size_t* input1_shape,
1719 size_t num_input2_dims,
1720 const size_t* input2_shape,
1721 const float* input1,
1722 const float* input2,
1723 float* output,
1724 pthreadpool_t threadpool);
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08001725
Marat Dukhan64e52512020-06-09 13:41:16 -07001726enum xnn_status xnn_create_truncation_nc_f32(
1727 size_t channels,
1728 size_t input_stride,
1729 size_t output_stride,
1730 uint32_t flags,
1731 xnn_operator_t* truncation_op_out);
1732
1733enum xnn_status xnn_setup_truncation_nc_f32(
1734 xnn_operator_t truncation_op,
1735 size_t batch_size,
1736 const float* input,
1737 float* output,
1738 pthreadpool_t threadpool);
1739
Marat Dukhanefc47b82019-11-18 09:25:38 -08001740#ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001741
Marat Dukhanefc47b82019-11-18 09:25:38 -08001742enum xnn_status xnn_create_convolution2d_nchw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001743 uint32_t input_padding_top,
1744 uint32_t input_padding_right,
1745 uint32_t input_padding_bottom,
1746 uint32_t input_padding_left,
1747 uint32_t kernel_height,
1748 uint32_t kernel_width,
1749 uint32_t subsampling_height,
1750 uint32_t subsampling_width,
1751 uint32_t dilation_height,
1752 uint32_t dilation_width,
1753 uint32_t groups,
1754 size_t group_input_channels,
1755 size_t group_output_channels,
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001756 size_t input_channel_stride,
1757 size_t output_channel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001758 const float* kernel,
1759 const float* bias,
1760 float output_min,
1761 float output_max,
1762 uint32_t flags,
1763 xnn_operator_t* convolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001764
Marat Dukhanefc47b82019-11-18 09:25:38 -08001765enum xnn_status xnn_setup_convolution2d_nchw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001766 xnn_operator_t convolution_op,
1767 size_t batch_size,
Marat Dukhan03bc4072020-01-28 14:52:25 -08001768 size_t input_height,
1769 size_t input_width,
1770 const float* input,
1771 float* output,
1772 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001773
Marat Dukhanefc47b82019-11-18 09:25:38 -08001774enum xnn_status xnn_create_global_average_pooling_ncw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001775 size_t channels,
1776 float output_min,
1777 float output_max,
1778 uint32_t flags,
1779 xnn_operator_t* global_average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001780
Marat Dukhanefc47b82019-11-18 09:25:38 -08001781enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001782 xnn_operator_t global_average_pooling_op,
1783 size_t batch_size,
1784 size_t width,
1785 const float* input,
1786 float* output,
1787 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001788
Marat Dukhanefc47b82019-11-18 09:25:38 -08001789#endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001790
Marat Dukhand6209722019-10-07 12:54:25 -07001791#endif // XNN_NO_F32_OPERATORS
1792
1793#ifndef XNN_NO_X32_OPERATORS
1794
Marat Dukhand6209722019-10-07 12:54:25 -07001795enum xnn_status xnn_create_channel_shuffle_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001796 size_t groups,
1797 size_t group_channels,
1798 size_t input_stride,
1799 size_t output_stride,
1800 uint32_t flags,
1801 xnn_operator_t* channel_shuffle_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001802
1803enum xnn_status xnn_setup_channel_shuffle_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001804 xnn_operator_t channel_shuffle_op,
1805 size_t batch_size,
1806 const void* input,
1807 void* output,
1808 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001809
Marat Dukhan065b11e2020-05-22 09:49:41 -07001810enum xnn_status xnn_create_constant_pad_nd_x32(
Marat Dukhan4662b192020-05-21 15:52:03 -07001811 const void* padding_value,
1812 uint32_t flags,
Marat Dukhan065b11e2020-05-22 09:49:41 -07001813 xnn_operator_t* constant_pad_op_out);
Marat Dukhan4662b192020-05-21 15:52:03 -07001814
Marat Dukhan065b11e2020-05-22 09:49:41 -07001815enum xnn_status xnn_setup_constant_pad_nd_x32(
1816 xnn_operator_t constant_pad_op,
Marat Dukhan4662b192020-05-21 15:52:03 -07001817 size_t num_dims,
1818 const size_t* input_shape,
1819 const size_t* pre_padding,
1820 const size_t* post_padding,
1821 const void* input,
1822 void* output,
1823 pthreadpool_t threadpool);
1824
Marat Dukhan4e21b272020-06-04 18:45:01 -07001825enum xnn_status xnn_create_copy_nc_x32(
1826 size_t channels,
1827 size_t input_stride,
1828 size_t output_stride,
1829 uint32_t flags,
1830 xnn_operator_t* copy_op_out);
1831
1832enum xnn_status xnn_setup_copy_nc_x32(
1833 xnn_operator_t copy_op,
1834 size_t batch_size,
1835 const void* input,
1836 void* output,
1837 pthreadpool_t threadpool);
1838
Marat Dukhan0e521172020-11-25 13:10:04 -08001839enum xnn_status xnn_create_depth_to_space_nhwc_x32(
1840 size_t output_channels,
1841 size_t input_channel_stride,
1842 size_t output_channel_stride,
1843 uint32_t block_size,
1844 uint32_t flags,
1845 xnn_operator_t* depth_to_space_op_out);
1846
1847enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
1848 xnn_operator_t depth_to_space_op,
1849 size_t batch_size,
1850 size_t input_height,
1851 size_t input_width,
1852 const void* input,
1853 void* output,
1854 pthreadpool_t threadpool);
1855
Marat Dukhanb4ac61d2020-11-12 12:08:30 -08001856enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
Marat Dukhanbb781b62020-11-12 13:34:05 -08001857 size_t output_channels,
Marat Dukhan9cbaa632020-11-24 21:28:50 -08001858 size_t input_channel_stride,
1859 size_t output_channel_stride,
Marat Dukhanb4ac61d2020-11-12 12:08:30 -08001860 uint32_t block_size,
1861 uint32_t flags,
1862 xnn_operator_t* depth_to_space_op_out);
1863
1864enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
1865 xnn_operator_t depth_to_space_op,
1866 size_t batch_size,
1867 size_t input_height,
1868 size_t input_width,
Marat Dukhanb4ac61d2020-11-12 12:08:30 -08001869 const void* input,
1870 void* output,
1871 pthreadpool_t threadpool);
1872
Marat Dukhand6209722019-10-07 12:54:25 -07001873enum xnn_status xnn_create_unpooling2d_nhwc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001874 uint32_t input_padding_top,
1875 uint32_t input_padding_right,
1876 uint32_t input_padding_bottom,
1877 uint32_t input_padding_left,
1878 uint32_t pooling_height,
1879 uint32_t pooling_width,
1880 size_t channels,
1881 size_t input_pixel_stride,
1882 size_t output_pixel_stride,
1883 uint32_t flags,
1884 xnn_operator_t* unpooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001885
1886enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001887 xnn_operator_t unpooling_op,
1888 size_t batch_size,
1889 size_t input_height,
1890 size_t input_width,
1891 const void* input,
1892 const uint32_t* index,
1893 void* output,
1894 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001895
1896#endif // XNN_NO_X32_OPERATORS
1897
Frank Barchard0ccccf12020-06-22 15:21:45 -07001898#ifndef XNN_NO_F16_OPERATORS
1899
Frank Barchard01898c02020-06-23 21:49:50 -07001900enum xnn_status xnn_create_add_nd_f16(
1901 float output_min,
1902 float output_max,
1903 uint32_t flags,
1904 xnn_operator_t* add_op_out);
1905
1906enum xnn_status xnn_setup_add_nd_f16(
1907 xnn_operator_t add_op,
1908 size_t num_input1_dims,
1909 const size_t* input1_shape,
1910 size_t num_input2_dims,
1911 const size_t* input2_shape,
1912 const void* input1,
1913 const void* input2,
1914 void* output,
1915 pthreadpool_t threadpool);
1916
Frank Barchard49b4dcc2020-06-26 14:07:19 -07001917enum xnn_status xnn_create_convolution2d_nhwc_f16(
1918 uint32_t input_padding_top,
1919 uint32_t input_padding_right,
1920 uint32_t input_padding_bottom,
1921 uint32_t input_padding_left,
1922 uint32_t kernel_height,
1923 uint32_t kernel_width,
1924 uint32_t subsampling_height,
1925 uint32_t subsampling_width,
1926 uint32_t dilation_height,
1927 uint32_t dilation_width,
1928 uint32_t groups,
1929 size_t group_input_channels,
1930 size_t group_output_channels,
1931 size_t input_channel_stride,
1932 size_t output_channel_stride,
1933 const void* kernel,
1934 const void* bias,
1935 float output_min,
1936 float output_max,
1937 uint32_t flags,
1938 xnn_operator_t* convolution_op_out);
1939
1940enum xnn_status xnn_setup_convolution2d_nhwc_f16(
1941 xnn_operator_t convolution_op,
1942 size_t batch_size,
1943 size_t input_height,
1944 size_t input_width,
1945 const void* input,
1946 void* output,
1947 pthreadpool_t threadpool);
1948
Marat Dukhanddb3d162021-10-25 17:05:51 -07001949enum xnn_status xnn_create_fully_connected_nc_f16(
1950 size_t input_channels,
1951 size_t output_channels,
1952 size_t input_stride,
1953 size_t output_stride,
1954 const void* kernel,
1955 const void* bias,
1956 float output_min,
1957 float output_max,
1958 uint32_t flags,
1959 xnn_operator_t* fully_connected_op_out);
1960
1961enum xnn_status xnn_setup_fully_connected_nc_f16(
1962 xnn_operator_t fully_connected_op,
1963 size_t batch_size,
1964 const void* input,
1965 void* output,
1966 pthreadpool_t threadpool);
1967
Frank Barchard0ccccf12020-06-22 15:21:45 -07001968enum xnn_status xnn_create_global_average_pooling_nwc_f16(
1969 size_t channels,
1970 size_t input_stride,
1971 size_t output_stride,
1972 float output_min,
1973 float output_max,
1974 uint32_t flags,
1975 xnn_operator_t* global_average_pooling_op_out);
1976
1977enum xnn_status xnn_setup_global_average_pooling_nwc_f16(
1978 xnn_operator_t global_average_pooling_op,
1979 size_t batch_size,
1980 size_t width,
1981 const void* input,
1982 void* output,
1983 pthreadpool_t threadpool);
1984
Frank Barcharda96948e2020-09-11 15:34:18 -07001985enum xnn_status xnn_create_hardswish_nc_f16(
1986 size_t channels,
1987 size_t input_stride,
1988 size_t output_stride,
1989 uint32_t flags,
1990 xnn_operator_t* hardswish_op_out);
1991
1992enum xnn_status xnn_setup_hardswish_nc_f16(
1993 xnn_operator_t hardswish_op,
1994 size_t batch_size,
1995 const void* input,
1996 void* output,
1997 pthreadpool_t threadpool);
1998
Marat Dukhan5756a922022-02-04 01:55:53 -08001999enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
2000 uint32_t input_padding_top,
2001 uint32_t input_padding_right,
2002 uint32_t input_padding_bottom,
2003 uint32_t input_padding_left,
2004 uint32_t pooling_height,
2005 uint32_t pooling_width,
2006 uint32_t stride_height,
2007 uint32_t stride_width,
2008 uint32_t dilation_height,
2009 uint32_t dilation_width,
2010 size_t channels,
2011 size_t input_pixel_stride,
2012 size_t output_pixel_stride,
2013 float output_min,
2014 float output_max,
2015 uint32_t flags,
2016 xnn_operator_t* max_pooling_op_out);
2017
2018enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
2019 xnn_operator_t max_pooling_op,
2020 size_t batch_size,
2021 size_t input_height,
2022 size_t input_width,
2023 const void* input,
2024 void* output,
2025 pthreadpool_t threadpool);
2026
Marat Dukhand04e2dd2020-09-13 21:19:39 -07002027enum xnn_status xnn_create_multiply_nd_f16(
2028 float output_min,
2029 float output_max,
2030 uint32_t flags,
2031 xnn_operator_t* multiply_op_out);
2032
2033enum xnn_status xnn_setup_multiply_nd_f16(
2034 xnn_operator_t multiply_op,
2035 size_t num_input1_dims,
2036 const size_t* input1_shape,
2037 size_t num_input2_dims,
2038 const size_t* input2_shape,
2039 const void* input1,
2040 const void* input2,
2041 void* output,
2042 pthreadpool_t threadpool);
2043
Marat Dukhan0a756b52022-02-03 23:08:50 -08002044enum xnn_status xnn_create_prelu_nc_f16(
2045 size_t channels,
2046 size_t input_stride,
2047 size_t output_stride,
2048 const void* negative_slope,
2049 uint32_t flags,
2050 xnn_operator_t* prelu_op_out);
2051
2052enum xnn_status xnn_setup_prelu_nc_f16(
2053 xnn_operator_t prelu_op,
2054 size_t batch_size,
2055 const void* input,
2056 void* output,
2057 pthreadpool_t threadpool);
2058
Frank Barchard0ccccf12020-06-22 15:21:45 -07002059#endif // XNN_NO_F16_OPERATORS
2060
Marat Dukhan6b45a7f2022-02-03 19:21:41 -08002061#ifndef XNN_NO_X16_OPERATORS
2062
2063enum xnn_status xnn_create_constant_pad_nd_x16(
2064 const void* padding_value,
2065 uint32_t flags,
2066 xnn_operator_t* constant_pad_op_out);
2067
2068enum xnn_status xnn_setup_constant_pad_nd_x16(
2069 xnn_operator_t constant_pad_op,
2070 size_t num_dims,
2071 const size_t* input_shape,
2072 const size_t* pre_padding,
2073 const size_t* post_padding,
2074 const void* input,
2075 void* output,
2076 pthreadpool_t threadpool);
2077
Marat Dukhan2bd2bd22022-02-04 03:34:32 -08002078enum xnn_status xnn_create_copy_nc_x16(
2079 size_t channels,
2080 size_t input_stride,
2081 size_t output_stride,
2082 uint32_t flags,
2083 xnn_operator_t* copy_op_out);
2084
2085enum xnn_status xnn_setup_copy_nc_x16(
2086 xnn_operator_t copy_op,
2087 size_t batch_size,
2088 const void* input,
2089 void* output,
2090 pthreadpool_t threadpool);
2091
Marat Dukhan6b45a7f2022-02-03 19:21:41 -08002092#endif // XNN_NO_X16_OPERATORS
2093
Marat Dukhan97262462021-06-18 16:14:17 -07002094#ifndef XNN_NO_QC8_OPERATORS
2095
2096enum xnn_status xnn_create_convolution2d_nhwc_qc8(
2097 uint32_t input_padding_top,
2098 uint32_t input_padding_right,
2099 uint32_t input_padding_bottom,
2100 uint32_t input_padding_left,
2101 uint32_t kernel_height,
2102 uint32_t kernel_width,
2103 uint32_t subsampling_height,
2104 uint32_t subsampling_width,
2105 uint32_t dilation_height,
2106 uint32_t dilation_width,
2107 uint32_t groups,
2108 size_t group_input_channels,
2109 size_t group_output_channels,
2110 size_t input_channel_stride,
2111 size_t output_channel_stride,
2112 int8_t input_zero_point,
2113 float input_scale,
2114 const float* kernel_scale,
2115 const int8_t* kernel,
2116 const int32_t* bias,
2117 int8_t output_zero_point,
2118 float output_scale,
2119 int8_t output_min,
2120 int8_t output_max,
2121 uint32_t flags,
2122 xnn_operator_t* convolution_op_out);
2123
2124enum xnn_status xnn_setup_convolution2d_nhwc_qc8(
2125 xnn_operator_t convolution_op,
2126 size_t batch_size,
2127 size_t input_height,
2128 size_t input_width,
2129 const int8_t* input,
2130 int8_t* output,
2131 pthreadpool_t threadpool);
2132
2133#endif // XNN_NO_QC8_OPERATORS
2134
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002135#ifndef XNN_NO_QS8_OPERATORS
2136
Marat Dukhanff209482020-09-03 14:26:53 -07002137enum xnn_status xnn_create_add_nd_qs8(
2138 int8_t input1_zero_point,
2139 float input1_scale,
2140 int8_t input2_zero_point,
2141 float input2_scale,
2142 int8_t output_zero_point,
2143 float output_scale,
2144 int8_t output_min,
2145 int8_t output_max,
2146 uint32_t flags,
2147 xnn_operator_t* add_op_out);
2148
2149enum xnn_status xnn_setup_add_nd_qs8(
2150 xnn_operator_t add_op,
2151 size_t num_input1_dims,
2152 const size_t* input1_shape,
2153 size_t num_input2_dims,
2154 const size_t* input2_shape,
2155 const int8_t* input1,
2156 const int8_t* input2,
2157 int8_t* output,
2158 pthreadpool_t threadpool);
2159
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002160enum xnn_status xnn_create_convolution2d_nhwc_qs8(
2161 uint32_t input_padding_top,
2162 uint32_t input_padding_right,
2163 uint32_t input_padding_bottom,
2164 uint32_t input_padding_left,
2165 uint32_t kernel_height,
2166 uint32_t kernel_width,
2167 uint32_t subsampling_height,
2168 uint32_t subsampling_width,
2169 uint32_t dilation_height,
2170 uint32_t dilation_width,
2171 uint32_t groups,
2172 size_t group_input_channels,
2173 size_t group_output_channels,
2174 size_t input_channel_stride,
2175 size_t output_channel_stride,
2176 int8_t input_zero_point,
2177 float input_scale,
2178 float kernel_scale,
2179 const int8_t* kernel,
2180 const int32_t* bias,
2181 int8_t output_zero_point,
2182 float output_scale,
2183 int8_t output_min,
2184 int8_t output_max,
2185 uint32_t flags,
2186 xnn_operator_t* convolution_op_out);
2187
2188enum xnn_status xnn_setup_convolution2d_nhwc_qs8(
2189 xnn_operator_t convolution_op,
2190 size_t batch_size,
2191 size_t input_height,
2192 size_t input_width,
2193 const int8_t* input,
2194 int8_t* output,
2195 pthreadpool_t threadpool);
2196
Marat Dukhanbea849a2021-07-30 16:25:30 -07002197enum xnn_status xnn_create_deconvolution2d_nhwc_qs8(
2198 uint32_t output_padding_top,
2199 uint32_t output_padding_right,
2200 uint32_t output_padding_bottom,
2201 uint32_t output_padding_left,
2202 uint32_t kernel_height,
2203 uint32_t kernel_width,
2204 uint32_t stride_height,
2205 uint32_t stride_width,
2206 uint32_t dilation_height,
2207 uint32_t dilation_width,
2208 uint32_t groups,
2209 size_t group_input_channels,
2210 size_t group_output_channels,
2211 size_t input_pixel_stride,
2212 size_t output_pixel_stride,
2213 int8_t input_zero_point,
2214 float input_scale,
2215 float kernel_scale,
2216 const int8_t* kernel,
2217 const int32_t* bias,
2218 int8_t output_zero_point,
2219 float output_scale,
2220 int8_t output_min,
2221 int8_t output_max,
2222 uint32_t flags,
2223 xnn_operator_t* deconvolution_op_out);
2224
2225enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8(
2226 xnn_operator_t deconvolution_op,
2227 size_t batch_size,
2228 size_t input_height,
2229 size_t input_width,
2230 uint32_t adjustment_height,
2231 uint32_t adjustment_width,
2232 const int8_t* input,
2233 int8_t* output,
2234 pthreadpool_t threadpool);
2235
Marat Dukhaneec00522021-09-15 00:01:41 -07002236enum xnn_status xnn_create_elu_nc_qs8(
2237 size_t channels,
2238 size_t input_stride,
2239 size_t output_stride,
2240 float alpha,
2241 int8_t input_zero_point,
2242 float input_scale,
2243 int8_t output_zero_point,
2244 float output_scale,
2245 int8_t output_min,
2246 int8_t output_max,
2247 uint32_t flags,
2248 xnn_operator_t* elu_op_out);
2249
2250enum xnn_status xnn_setup_elu_nc_qs8(
2251 xnn_operator_t elu_op,
2252 size_t batch_size,
2253 const int8_t* input,
2254 int8_t* output,
2255 pthreadpool_t threadpool);
2256
Marat Dukhand23cb6e2021-04-01 01:18:58 -07002257enum xnn_status xnn_create_fully_connected_nc_qs8(
2258 size_t input_channels,
2259 size_t output_channels,
2260 size_t input_stride,
2261 size_t output_stride,
2262 int8_t input_zero_point,
2263 float input_scale,
2264 float kernel_scale,
2265 const int8_t* kernel,
2266 const int32_t* bias,
2267 int8_t output_zero_point,
2268 float output_scale,
2269 int8_t output_min,
2270 int8_t output_max,
2271 uint32_t flags,
2272 xnn_operator_t* fully_connected_op_out);
2273
2274enum xnn_status xnn_setup_fully_connected_nc_qs8(
2275 xnn_operator_t fully_connected_op,
2276 size_t batch_size,
2277 const int8_t* input,
2278 int8_t* output,
2279 pthreadpool_t threadpool);
2280
Marat Dukhan9e0b5392020-08-07 02:29:34 -07002281enum xnn_status xnn_create_global_average_pooling_nwc_qs8(
2282 size_t channels,
2283 size_t input_stride,
2284 size_t output_stride,
2285 int8_t input_zero_point,
2286 float input_scale,
2287 int8_t output_zero_point,
2288 float output_scale,
2289 int8_t output_min,
2290 int8_t output_max,
2291 uint32_t flags,
2292 xnn_operator_t* global_average_pooling_op_out);
2293
2294enum xnn_status xnn_setup_global_average_pooling_nwc_qs8(
2295 xnn_operator_t global_average_pooling_op,
2296 size_t batch_size,
2297 size_t width,
2298 const int8_t* input,
2299 int8_t* output,
2300 pthreadpool_t threadpool);
2301
Marat Dukhan0853b8a2021-08-03 01:01:53 -07002302enum xnn_status xnn_create_multiply_nd_qs8(
2303 int8_t input1_zero_point,
2304 float input1_scale,
2305 int8_t input2_zero_point,
2306 float input2_scale,
2307 int8_t output_zero_point,
2308 float output_scale,
2309 int8_t output_min,
2310 int8_t output_max,
2311 uint32_t flags,
2312 xnn_operator_t* multiply_op_out);
2313
2314enum xnn_status xnn_setup_multiply_nd_qs8(
2315 xnn_operator_t multiply_op,
2316 size_t num_input1_dims,
2317 const size_t* input1_shape,
2318 size_t num_input2_dims,
2319 const size_t* input2_shape,
2320 const int8_t* input1,
2321 const int8_t* input2,
2322 int8_t* output,
2323 pthreadpool_t threadpool);
2324
Marat Dukhan71a9bb12021-09-09 08:54:18 -07002325enum xnn_status xnn_create_sigmoid_nc_qs8(
2326 size_t channels,
2327 size_t input_stride,
2328 size_t output_stride,
2329 int8_t input_zero_point,
2330 float input_scale,
2331 int8_t output_zero_point,
2332 float output_scale,
2333 int8_t output_min,
2334 int8_t output_max,
2335 uint32_t flags,
2336 xnn_operator_t* sigmoid_op_out);
2337
2338enum xnn_status xnn_setup_sigmoid_nc_qs8(
2339 xnn_operator_t sigmoid_op,
2340 size_t batch_size,
2341 const int8_t* input,
2342 int8_t* output,
2343 pthreadpool_t threadpool);
2344
Marat Dukhan8e2fd202021-09-07 18:42:01 -07002345enum xnn_status xnn_create_subtract_nd_qs8(
2346 int8_t input1_zero_point,
2347 float input1_scale,
2348 int8_t input2_zero_point,
2349 float input2_scale,
2350 int8_t output_zero_point,
2351 float output_scale,
2352 int8_t output_min,
2353 int8_t output_max,
2354 uint32_t flags,
2355 xnn_operator_t* subtract_op_out);
2356
2357enum xnn_status xnn_setup_subtract_nd_qs8(
2358 xnn_operator_t subtract_op,
2359 size_t num_input1_dims,
2360 const size_t* input1_shape,
2361 size_t num_input2_dims,
2362 const size_t* input2_shape,
2363 const int8_t* input1,
2364 const int8_t* input2,
2365 int8_t* output,
2366 pthreadpool_t threadpool);
2367
Marat Dukhan5de7bc02021-09-09 19:04:01 -07002368enum xnn_status xnn_create_tanh_nc_qs8(
2369 size_t channels,
2370 size_t input_stride,
2371 size_t output_stride,
2372 int8_t input_zero_point,
2373 float input_scale,
2374 int8_t output_zero_point,
2375 float output_scale,
2376 int8_t output_min,
2377 int8_t output_max,
2378 uint32_t flags,
2379 xnn_operator_t* tanh_op_out);
2380
2381enum xnn_status xnn_setup_tanh_nc_qs8(
2382 xnn_operator_t tanh_op,
2383 size_t batch_size,
2384 const int8_t* input,
2385 int8_t* output,
2386 pthreadpool_t threadpool);
2387
Marat Dukhan16f1e1a2020-08-04 16:38:22 -07002388#endif // XNN_NO_QS8_OPERATORS
2389
Marat Dukhan08b7a972020-07-14 18:17:29 -07002390#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhand6209722019-10-07 12:54:25 -07002391
Marat Dukhandb007cd2021-07-20 23:42:39 -07002392enum xnn_status xnn_create_add_nd_qu8(
2393 uint8_t input1_zero_point,
2394 float input1_scale,
2395 uint8_t input2_zero_point,
2396 float input2_scale,
2397 uint8_t output_zero_point,
2398 float output_scale,
2399 uint8_t output_min,
2400 uint8_t output_max,
2401 uint32_t flags,
2402 xnn_operator_t* add_op_out);
2403
2404enum xnn_status xnn_setup_add_nd_qu8(
2405 xnn_operator_t add_op,
2406 size_t num_input1_dims,
2407 const size_t* input1_shape,
2408 size_t num_input2_dims,
2409 const size_t* input2_shape,
2410 const uint8_t* input1,
2411 const uint8_t* input2,
2412 uint8_t* output,
2413 pthreadpool_t threadpool);
2414
Marat Dukhan08b7a972020-07-14 18:17:29 -07002415enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002416 uint32_t input_padding_top,
2417 uint32_t input_padding_right,
2418 uint32_t input_padding_bottom,
2419 uint32_t input_padding_left,
2420 uint32_t pooling_height,
2421 uint32_t pooling_width,
2422 uint32_t stride_height,
2423 uint32_t stride_width,
2424 size_t channels,
2425 size_t input_pixel_stride,
2426 size_t output_pixel_stride,
2427 uint8_t input_zero_point,
2428 float input_scale,
2429 uint8_t output_zero_point,
2430 float output_scale,
2431 uint8_t output_min,
2432 uint8_t output_max,
2433 uint32_t flags,
2434 xnn_operator_t* average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07002435
Marat Dukhan08b7a972020-07-14 18:17:29 -07002436enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002437 xnn_operator_t average_pooling_op,
2438 size_t batch_size,
2439 size_t input_height,
2440 size_t input_width,
2441 const uint8_t* input,
2442 uint8_t* output,
2443 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07002444
Marat Dukhan08b7a972020-07-14 18:17:29 -07002445enum xnn_status xnn_create_convolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002446 uint32_t input_padding_top,
2447 uint32_t input_padding_right,
2448 uint32_t input_padding_bottom,
2449 uint32_t input_padding_left,
2450 uint32_t kernel_height,
2451 uint32_t kernel_width,
2452 uint32_t subsampling_height,
2453 uint32_t subsampling_width,
2454 uint32_t dilation_height,
2455 uint32_t dilation_width,
2456 uint32_t groups,
2457 size_t group_input_channels,
2458 size_t group_output_channels,
Marat Dukhan08b7a972020-07-14 18:17:29 -07002459 size_t input_channel_stride,
2460 size_t output_channel_stride,
Marat Dukhan03bc4072020-01-28 14:52:25 -08002461 uint8_t input_zero_point,
2462 float input_scale,
2463 uint8_t kernel_zero_point,
2464 float kernel_scale,
2465 const uint8_t* kernel,
2466 const int32_t* bias,
2467 uint8_t output_zero_point,
2468 float output_scale,
2469 uint8_t output_min,
2470 uint8_t output_max,
2471 uint32_t flags,
2472 xnn_operator_t* convolution_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002473
Marat Dukhan08b7a972020-07-14 18:17:29 -07002474enum xnn_status xnn_setup_convolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002475 xnn_operator_t convolution_op,
2476 size_t batch_size,
2477 size_t input_height,
2478 size_t input_width,
2479 const uint8_t* input,
2480 uint8_t* output,
2481 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002482
Marat Dukhan08b7a972020-07-14 18:17:29 -07002483enum xnn_status xnn_create_deconvolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002484 uint32_t output_padding_top,
2485 uint32_t output_padding_right,
2486 uint32_t output_padding_bottom,
2487 uint32_t output_padding_left,
2488 uint32_t kernel_height,
2489 uint32_t kernel_width,
2490 uint32_t stride_height,
2491 uint32_t stride_width,
2492 uint32_t dilation_height,
2493 uint32_t dilation_width,
2494 uint32_t groups,
2495 size_t group_input_channels,
2496 size_t group_output_channels,
2497 size_t input_pixel_stride,
2498 size_t output_pixel_stride,
2499 uint8_t input_zero_point,
2500 float input_scale,
2501 uint8_t kernel_zero_point,
2502 float kernel_scale,
2503 const uint8_t* kernel,
2504 const int32_t* bias,
2505 uint8_t output_zero_point,
2506 float output_scale,
2507 uint8_t output_min,
2508 uint8_t output_max,
2509 uint32_t flags,
2510 xnn_operator_t* deconvolution_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002511
Marat Dukhan08b7a972020-07-14 18:17:29 -07002512enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002513 xnn_operator_t deconvolution_op,
2514 size_t batch_size,
2515 size_t input_height,
2516 size_t input_width,
2517 uint32_t adjustment_height,
2518 uint32_t adjustment_width,
2519 const uint8_t* input,
2520 uint8_t* output,
2521 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002522
Marat Dukhan08b7a972020-07-14 18:17:29 -07002523enum xnn_status xnn_create_fully_connected_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002524 size_t input_channels,
2525 size_t output_channels,
2526 size_t input_stride,
2527 size_t output_stride,
2528 uint8_t input_zero_point,
2529 float input_scale,
2530 uint8_t kernel_zero_point,
2531 float kernel_scale,
2532 const uint8_t* kernel,
2533 const int32_t* bias,
2534 uint8_t output_zero_point,
2535 float output_scale,
2536 uint8_t output_min,
2537 uint8_t output_max,
2538 uint32_t flags,
2539 xnn_operator_t* fully_connected_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002540
Marat Dukhan08b7a972020-07-14 18:17:29 -07002541enum xnn_status xnn_setup_fully_connected_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002542 xnn_operator_t fully_connected_op,
2543 size_t batch_size,
2544 const uint8_t* input,
2545 uint8_t* output,
2546 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002547
Marat Dukhan08b7a972020-07-14 18:17:29 -07002548enum xnn_status xnn_create_global_average_pooling_nwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002549 size_t channels,
2550 size_t input_stride,
2551 size_t output_stride,
2552 uint8_t input_zero_point,
2553 float input_scale,
2554 uint8_t output_zero_point,
2555 float output_scale,
2556 uint8_t output_min,
2557 uint8_t output_max,
2558 uint32_t flags,
2559 xnn_operator_t* global_average_pooling_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002560
Marat Dukhan08b7a972020-07-14 18:17:29 -07002561enum xnn_status xnn_setup_global_average_pooling_nwc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002562 xnn_operator_t global_average_pooling_op,
2563 size_t batch_size,
2564 size_t width,
2565 const uint8_t* input,
2566 uint8_t* output,
2567 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002568
Marat Dukhan08b7a972020-07-14 18:17:29 -07002569enum xnn_status xnn_create_leaky_relu_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002570 size_t channels,
2571 size_t input_stride,
2572 size_t output_stride,
2573 float negative_slope,
2574 uint8_t input_zero_point,
2575 float input_scale,
2576 uint8_t output_zero_point,
2577 float output_scale,
2578 uint8_t output_min,
2579 uint8_t output_max,
2580 uint32_t flags,
2581 xnn_operator_t* leaky_relu_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002582
Marat Dukhan08b7a972020-07-14 18:17:29 -07002583enum xnn_status xnn_setup_leaky_relu_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002584 xnn_operator_t leaky_relu_op,
2585 size_t batch_size,
2586 const uint8_t* input,
2587 uint8_t* output,
2588 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002589
Marat Dukhan0853b8a2021-08-03 01:01:53 -07002590enum xnn_status xnn_create_multiply_nd_qu8(
2591 uint8_t input1_zero_point,
2592 float input1_scale,
2593 uint8_t input2_zero_point,
2594 float input2_scale,
2595 uint8_t output_zero_point,
2596 float output_scale,
2597 uint8_t output_min,
2598 uint8_t output_max,
2599 uint32_t flags,
2600 xnn_operator_t* multiply_op_out);
2601
2602enum xnn_status xnn_setup_multiply_nd_qu8(
2603 xnn_operator_t multiply_op,
2604 size_t num_input1_dims,
2605 const size_t* input1_shape,
2606 size_t num_input2_dims,
2607 const size_t* input2_shape,
2608 const uint8_t* input1,
2609 const uint8_t* input2,
2610 uint8_t* output,
2611 pthreadpool_t threadpool);
2612
Marat Dukhan08b7a972020-07-14 18:17:29 -07002613enum xnn_status xnn_create_sigmoid_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002614 size_t channels,
2615 size_t input_stride,
2616 size_t output_stride,
2617 uint8_t input_zero_point,
2618 float input_scale,
2619 uint8_t output_zero_point,
2620 float output_scale,
2621 uint8_t output_min,
2622 uint8_t output_max,
2623 uint32_t flags,
2624 xnn_operator_t* sigmoid_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002625
Marat Dukhan08b7a972020-07-14 18:17:29 -07002626enum xnn_status xnn_setup_sigmoid_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002627 xnn_operator_t sigmoid_op,
2628 size_t batch_size,
2629 const uint8_t* input,
2630 uint8_t* output,
2631 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07002632
Marat Dukhan08b7a972020-07-14 18:17:29 -07002633enum xnn_status xnn_create_softmax_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002634 size_t channels,
2635 size_t input_stride,
2636 size_t output_stride,
2637 float input_scale,
2638 uint8_t output_zero_point,
2639 float output_scale,
2640 uint32_t flags,
2641 xnn_operator_t* softmax_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07002642
Marat Dukhan08b7a972020-07-14 18:17:29 -07002643enum xnn_status xnn_setup_softmax_nc_qu8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002644 xnn_operator_t softmax_op,
2645 size_t batch_size,
2646 const uint8_t* input,
2647 uint8_t* output,
2648 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07002649
Marat Dukhan8e2fd202021-09-07 18:42:01 -07002650enum xnn_status xnn_create_subtract_nd_qu8(
2651 uint8_t input1_zero_point,
2652 float input1_scale,
2653 uint8_t input2_zero_point,
2654 float input2_scale,
2655 uint8_t output_zero_point,
2656 float output_scale,
2657 uint8_t output_min,
2658 uint8_t output_max,
2659 uint32_t flags,
2660 xnn_operator_t* subtract_op_out);
2661
2662enum xnn_status xnn_setup_subtract_nd_qu8(
2663 xnn_operator_t subtract_op,
2664 size_t num_input1_dims,
2665 const size_t* input1_shape,
2666 size_t num_input2_dims,
2667 const size_t* input2_shape,
2668 const uint8_t* input1,
2669 const uint8_t* input2,
2670 uint8_t* output,
2671 pthreadpool_t threadpool);
2672
Marat Dukhan5de7bc02021-09-09 19:04:01 -07002673enum xnn_status xnn_create_tanh_nc_qu8(
2674 size_t channels,
2675 size_t input_stride,
2676 size_t output_stride,
2677 uint8_t input_zero_point,
2678 float input_scale,
2679 uint8_t output_zero_point,
2680 float output_scale,
2681 uint8_t output_min,
2682 uint8_t output_max,
2683 uint32_t flags,
2684 xnn_operator_t* tanh_op_out);
2685
2686enum xnn_status xnn_setup_tanh_nc_qu8(
2687 xnn_operator_t tanh_op,
2688 size_t batch_size,
2689 const uint8_t* input,
2690 uint8_t* output,
2691 pthreadpool_t threadpool);
2692
Marat Dukhan08b7a972020-07-14 18:17:29 -07002693#endif // XNN_NO_QU8_OPERATORS
Marat Dukhand6209722019-10-07 12:54:25 -07002694
Marat Dukhan94912792021-08-16 21:40:30 -07002695#ifndef XNN_NO_S8_OPERATORS
Marat Dukhand6209722019-10-07 12:54:25 -07002696
Marat Dukhan61c0c9e2021-08-16 23:16:14 -07002697enum xnn_status xnn_create_clamp_nc_s8(
2698 size_t channels,
2699 size_t input_stride,
2700 size_t output_stride,
2701 int8_t output_min,
2702 int8_t output_max,
2703 uint32_t flags,
2704 xnn_operator_t* clamp_op_out);
2705
2706enum xnn_status xnn_setup_clamp_nc_s8(
2707 xnn_operator_t clamp_op,
2708 size_t batch_size,
2709 const int8_t* input,
2710 int8_t* output,
2711 pthreadpool_t threadpool);
2712
Marat Dukhandc5c1482021-08-16 09:03:15 -07002713enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
2714 uint32_t input_padding_top,
2715 uint32_t input_padding_right,
2716 uint32_t input_padding_bottom,
2717 uint32_t input_padding_left,
2718 uint32_t pooling_height,
2719 uint32_t pooling_width,
2720 uint32_t stride_height,
2721 uint32_t stride_width,
2722 uint32_t dilation_height,
2723 uint32_t dilation_width,
2724 size_t channels,
2725 size_t input_pixel_stride,
2726 size_t output_pixel_stride,
2727 int8_t output_min,
2728 int8_t output_max,
2729 uint32_t flags,
2730 xnn_operator_t* max_pooling_op_out);
2731
2732enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
2733 xnn_operator_t max_pooling_op,
2734 size_t batch_size,
2735 size_t input_height,
2736 size_t input_width,
2737 const int8_t* input,
2738 int8_t* output,
2739 pthreadpool_t threadpool);
2740
Marat Dukhan0ab75532021-11-24 16:50:30 -08002741enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
2742 size_t channels,
2743 size_t input_pixel_stride,
2744 size_t output_pixel_stride,
2745 uint32_t flags,
2746 xnn_operator_t* resize_op_out);
2747
2748enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
2749 xnn_operator_t resize_op,
2750 size_t batch_size,
2751 size_t input_height,
2752 size_t input_width,
2753 size_t output_height,
2754 size_t output_width,
2755 const int8_t* input,
2756 int8_t* output,
2757 pthreadpool_t threadpool);
2758
Marat Dukhan94912792021-08-16 21:40:30 -07002759#endif // XNN_NO_S8_OPERATORS
Marat Dukhandc5c1482021-08-16 09:03:15 -07002760
2761#ifndef XNN_NO_U8_OPERATORS
2762
Marat Dukhand6209722019-10-07 12:54:25 -07002763enum xnn_status xnn_create_clamp_nc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002764 size_t channels,
2765 size_t input_stride,
2766 size_t output_stride,
2767 uint8_t output_min,
2768 uint8_t output_max,
2769 uint32_t flags,
2770 xnn_operator_t* clamp_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07002771
2772enum xnn_status xnn_setup_clamp_nc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002773 xnn_operator_t clamp_op,
2774 size_t batch_size,
2775 const uint8_t* input,
2776 uint8_t* output,
2777 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002778
2779enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002780 uint32_t input_padding_top,
2781 uint32_t input_padding_right,
2782 uint32_t input_padding_bottom,
2783 uint32_t input_padding_left,
2784 uint32_t pooling_height,
2785 uint32_t pooling_width,
2786 uint32_t stride_height,
2787 uint32_t stride_width,
2788 uint32_t dilation_height,
2789 uint32_t dilation_width,
2790 size_t channels,
2791 size_t input_pixel_stride,
2792 size_t output_pixel_stride,
2793 uint8_t output_min,
2794 uint8_t output_max,
2795 uint32_t flags,
2796 xnn_operator_t* max_pooling_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002797
2798enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002799 xnn_operator_t max_pooling_op,
2800 size_t batch_size,
2801 size_t input_height,
2802 size_t input_width,
2803 const uint8_t* input,
2804 uint8_t* output,
2805 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002806
Marat Dukhan0ab75532021-11-24 16:50:30 -08002807enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
2808 size_t channels,
2809 size_t input_pixel_stride,
2810 size_t output_pixel_stride,
2811 uint32_t flags,
2812 xnn_operator_t* resize_op_out);
2813
2814enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
2815 xnn_operator_t resize_op,
2816 size_t batch_size,
2817 size_t input_height,
2818 size_t input_width,
2819 size_t output_height,
2820 size_t output_width,
2821 const uint8_t* input,
2822 uint8_t* output,
2823 pthreadpool_t threadpool);
2824
Marat Dukhand6209722019-10-07 12:54:25 -07002825#endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002826
Marat Dukhand6209722019-10-07 12:54:25 -07002827#ifndef XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002828
Marat Dukhan2bd2bd22022-02-04 03:34:32 -08002829enum xnn_status xnn_create_copy_nc_x8(
2830 size_t channels,
2831 size_t input_stride,
2832 size_t output_stride,
2833 uint32_t flags,
2834 xnn_operator_t* copy_op_out);
2835
2836enum xnn_status xnn_setup_copy_nc_x8(
2837 xnn_operator_t copy_op,
2838 size_t batch_size,
2839 const void* input,
2840 void* output,
2841 pthreadpool_t threadpool);
2842
XNNPACK Teamb455b122019-09-27 18:10:33 -07002843enum xnn_status xnn_create_channel_shuffle_nc_x8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002844 size_t groups,
2845 size_t group_channels,
2846 size_t input_stride,
2847 size_t output_stride,
2848 uint32_t flags,
2849 xnn_operator_t* channel_shuffle_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002850
2851enum xnn_status xnn_setup_channel_shuffle_nc_x8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08002852 xnn_operator_t channel_shuffle_op,
2853 size_t batch_size,
2854 const void* input,
2855 void* output,
2856 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002857
Marat Dukhan139e9612021-08-09 09:03:07 -07002858enum xnn_status xnn_create_constant_pad_nd_x8(
2859 const void* padding_value,
2860 uint32_t flags,
2861 xnn_operator_t* constant_pad_op_out);
2862
2863enum xnn_status xnn_setup_constant_pad_nd_x8(
2864 xnn_operator_t constant_pad_op,
2865 size_t num_dims,
2866 const size_t* input_shape,
2867 const size_t* pre_padding,
2868 const size_t* post_padding,
2869 const void* input,
2870 void* output,
2871 pthreadpool_t threadpool);
2872
Marat Dukhand6209722019-10-07 12:54:25 -07002873#endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07002874
Marat Dukhanaf2ba002021-10-24 14:21:41 -07002875#ifndef XNN_NO_CVT_OPERATORS
2876
2877enum xnn_status xnn_create_convert_nc_f16_f32(
2878 size_t channels,
2879 size_t input_stride,
2880 size_t output_stride,
2881 uint32_t flags,
2882 xnn_operator_t* convert_op_out);
2883
2884enum xnn_status xnn_setup_convert_nc_f16_f32(
2885 xnn_operator_t convert_op,
2886 size_t batch_size,
2887 const void* input,
2888 float* output,
2889 pthreadpool_t threadpool);
2890
Marat Dukhana0c61682021-11-10 19:23:41 -08002891enum xnn_status xnn_create_convert_nc_f32_f16(
2892 size_t channels,
2893 size_t input_stride,
2894 size_t output_stride,
2895 uint32_t flags,
2896 xnn_operator_t* convert_op_out);
2897
2898enum xnn_status xnn_setup_convert_nc_f32_f16(
2899 xnn_operator_t convert_op,
2900 size_t batch_size,
2901 const float* input,
2902 void* output,
2903 pthreadpool_t threadpool);
2904
Marat Dukhaned2d7762021-12-03 23:51:19 -08002905enum xnn_status xnn_create_convert_nc_f32_qs8(
2906 size_t channels,
2907 size_t input_stride,
2908 size_t output_stride,
2909 float output_scale,
2910 int8_t output_zero_point,
2911 int8_t output_min,
2912 int8_t output_max,
2913 uint32_t flags,
2914 xnn_operator_t* convert_op_out);
2915
2916enum xnn_status xnn_setup_convert_nc_f32_qs8(
2917 xnn_operator_t convert_op,
2918 size_t batch_size,
2919 const float* input,
2920 int8_t* output,
2921 pthreadpool_t threadpool);
2922
2923enum xnn_status xnn_create_convert_nc_f32_qu8(
2924 size_t channels,
2925 size_t input_stride,
2926 size_t output_stride,
2927 float output_scale,
2928 uint8_t output_zero_point,
2929 uint8_t output_min,
2930 uint8_t output_max,
2931 uint32_t flags,
2932 xnn_operator_t* convert_op_out);
2933
2934enum xnn_status xnn_setup_convert_nc_f32_qu8(
2935 xnn_operator_t convert_op,
2936 size_t batch_size,
2937 const float* input,
2938 uint8_t* output,
2939 pthreadpool_t threadpool);
2940
Marat Dukhanf92206b2021-12-10 17:02:07 -08002941enum xnn_status xnn_create_convert_nc_qs8_f32(
2942 size_t channels,
2943 size_t input_stride,
2944 size_t output_stride,
2945 float input_scale,
2946 int8_t input_zero_point,
2947 uint32_t flags,
2948 xnn_operator_t* convert_op_out);
2949
2950enum xnn_status xnn_setup_convert_nc_qs8_f32(
2951 xnn_operator_t convert_op,
2952 size_t batch_size,
2953 const int8_t* input,
2954 float* output,
2955 pthreadpool_t threadpool);
2956
2957enum xnn_status xnn_create_convert_nc_qu8_f32(
2958 size_t channels,
2959 size_t input_stride,
2960 size_t output_stride,
2961 float input_scale,
2962 uint8_t input_zero_point,
2963 uint32_t flags,
2964 xnn_operator_t* convert_op_out);
2965
2966enum xnn_status xnn_setup_convert_nc_qu8_f32(
2967 xnn_operator_t convert_op,
2968 size_t batch_size,
2969 const uint8_t* input,
2970 float* output,
2971 pthreadpool_t threadpool);
2972
Marat Dukhanaf2ba002021-10-24 14:21:41 -07002973#endif // XNN_NO_CVT_OPERATORS
2974
XNNPACK Teamb455b122019-09-27 18:10:33 -07002975#ifdef __cplusplus
2976} // extern "C"
2977#endif