blob: 221cdf57339ad229285c2afbc84a24ef258cc910 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14
15#include <pthreadpool.h>
16
17#ifdef __cplusplus
18extern "C" {
19#endif
20
Marat Dukhan5609a082019-10-07 10:56:58 -070021/// The number of bytes XNNPACK may read beyond array bounds.
22/// The caller must allocate at this this many extra bytes after the tensor data passed to XNNPACK.
23///
24/// Note: XNNPACK reads, but never writes beyond array bounds.
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#define XNN_EXTRA_BYTES 16
26
Marat Dukhanca2733c2019-11-15 23:21:17 -080027/// Maximum number of dimensions in tensor shape.
Marat Dukhanfc2b96e2019-12-03 12:04:04 -080028#define XNN_MAX_TENSOR_DIMS 6
Marat Dukhanca2733c2019-11-15 23:21:17 -080029
Marat Dukhan5609a082019-10-07 10:56:58 -070030/// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
Marat Dukhandd69f0b2019-10-04 19:40:03 -070031#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
XNNPACK Teamb455b122019-09-27 18:10:33 -070032
Marat Dukhanc4f0ff92019-12-03 14:59:08 -080033/// Assume transposed weights in a fully connected operator.
34#define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001
35
Marat Dukhan5609a082019-10-07 10:56:58 -070036/// The operator assumes NHWC layout for the input, regardless of the output layout.
XNNPACK Teamb455b122019-09-27 18:10:33 -070037#define XNN_FLAG_INPUT_NHWC 0x00000002
38
Marat Dukhan8440fde2019-10-24 12:46:13 -070039/// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size.
40#define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004
41
Marat Dukhan69722492019-11-11 19:55:50 -080042/// Match behaviour of TensorFlow 1.x.
43#define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004
44
45/// Align corners of input and output images in resize operations.
46#define XNN_FLAG_ALIGN_CORNERS 0x00000008
47
Marat Dukhan5609a082019-10-07 10:56:58 -070048/// Status code for any XNNPACK function call.
XNNPACK Teamb455b122019-09-27 18:10:33 -070049enum xnn_status {
Marat Dukhan5609a082019-10-07 10:56:58 -070050 /// The call succeeded, and all output arguments now contain valid data.
XNNPACK Teamb455b122019-09-27 18:10:33 -070051 xnn_status_success = 0,
52 xnn_status_uninitialized = 1,
53 xnn_status_invalid_parameter = 2,
54 xnn_status_invalid_state = 3,
55 xnn_status_unsupported_parameter = 4,
56 xnn_status_unsupported_hardware = 5,
57 xnn_status_out_of_memory = 6,
58};
59
Marat Dukhan04f03be2019-11-19 12:36:47 -080060struct xnn_allocator {
61 /// User-specified pointer that will be passed as-is to all functions in this structure.
62 void* context;
63 /// Pointer to a function to be called for general memory allocation.
64 ///
65 /// @param context - The user-specified pointer from xnn_allocator structure.
66 /// @param size - The size of the memory block to allocate, in bytes.
67 ///
68 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
69 /// If allocation fails, the function must return NULL.
70 void* (*allocate)(void* context, size_t size);
71 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously
72 /// allocated memory block. The content of the old memory block is copied to the new memory block.
73 ///
74 /// @param context - The user-specified pointer from xnn_allocator structure.
75 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
76 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call.
77 /// @param size - The new size of the memory block to allocate, in bytes.
78 ///
79 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous
80 /// memory block.
81 /// If allocation fails, the function must return NULL, but must not release the previous memory block.
82 void* (*reallocate)(void* context, void* pointer, size_t size);
83 /// Pointer to a function to be called for general memory de-allocation.
84 ///
85 /// @param context - The user-specified pointer from xnn_allocator structure.
86 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
87 /// If the pointer is NULL, the @ref deallocate call is a no-op.
88 void (*deallocate)(void* context, void* pointer);
89 /// Pointer to a function to be called for aligned memory allocation.
90 ///
91 /// @param context - The user-specified pointer from xnn_allocator structure.
92 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2.
93 /// @param size - The size of the memory block to allocate, in bytes.
94 ///
95 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
96 /// If allocation fails, the function must return NULL.
97 void* (*aligned_allocate)(void* context, size_t alignment, size_t size);
98 /// Pointer to a function to be called for aligned memory de-allocation.
99 ///
100 /// @param context - The user-specified pointer from xnn_allocator structure.
101 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL.
102 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op.
103 void (*aligned_deallocate)(void* context, void* pointer);
104};
105
Marat Dukhan5609a082019-10-07 10:56:58 -0700106/// Initialize XNNPACK library.
107///
108/// XNNPACK must be successfully initialized before use.
109/// During initialization, XNNPACK populates internal structures depending on host processor. It can be time-consuming.
110///
Marat Dukhan04f03be2019-11-19 12:36:47 -0800111/// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation.
112/// If this argument is NULL, system-provided memory management functions (e.g. malloc/free)
113/// will be used.
114///
Marat Dukhan5609a082019-10-07 10:56:58 -0700115/// @retval xnn_status_success - XNNPACK is succesfully initialized and ready to use.
116/// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition.
117/// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the
118/// minimum hardware requirements for XNNPACK. E.g. this may happen on x86
119/// processors without SSE2 extension, or on 32-bit ARM processors without
120/// the NEON SIMD extension.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800121enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700122
Marat Dukhan5609a082019-10-07 10:56:58 -0700123/// Deinitialize XNNPACK library.
124///
125/// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call.
126///
127/// @retval xnn_status_success - deinitialization call succeeded.
XNNPACK Teamb455b122019-09-27 18:10:33 -0700128enum xnn_status xnn_deinitialize(void);
129
Marat Dukhandd2b5882020-02-06 15:12:26 -0800130/// Subgraph is an abstract representation of a neural network model.
131/// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800132typedef struct xnn_subgraph* xnn_subgraph_t;
133
Marat Dukhandd2b5882020-02-06 15:12:26 -0800134/// Create a empty Subgraph object.
135///
136/// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation.
137/// The Subgraph object would avoid creating internal Value IDs in the
138/// [0, reserved_value_ids-1] range.
139/// @param flags - binary features of the subgraph. No supported flags are currently defined.
140/// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon
141/// successful return.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800142enum xnn_status xnn_create_subgraph(
143 uint32_t external_value_ids,
144 uint32_t flags,
145 xnn_subgraph_t* subgraph_out);
146
Marat Dukhandd2b5882020-02-06 15:12:26 -0800147/// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph.
148///
149/// @param subgraph - the Subgraph object to destroy.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800150enum xnn_status xnn_delete_subgraph(
151 xnn_subgraph_t subgraph);
152
153#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001
154#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
155
156#define XNN_INVALID_VALUE_ID UINT32_MAX
157
Marat Dukhandd2b5882020-02-06 15:12:26 -0800158/// Type of elements in a Value object.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800159enum xnn_datatype {
Marat Dukhandd2b5882020-02-06 15:12:26 -0800160 /// Invalid data type. Valid Values never have this datatype.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800161 xnn_datatype_invalid = 0,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800162 /// IEEE754 single-precision floating-point.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800163 xnn_datatype_fp32 = 1,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800164 /// IEEE754 half-precision floating-point.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800165 xnn_datatype_fp16 = 2,
166};
167
Marat Dukhandd2b5882020-02-06 15:12:26 -0800168/// Define a tensor-type Value and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800169///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800170/// @param subgraph - a Subgraph object that will own the created Value.
171/// @param datatype - type of the tensor elements.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800172/// @param num_dims - number of dimensions in the shape.
173/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800174/// XNNPACK does not keep any pointers to this array after the function returns.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800175/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
Marat Dukhandd2b5882020-02-06 15:12:26 -0800176/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
177/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
178/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
179/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
180/// created for the Value.
181/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
182/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
183/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
184/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800185enum xnn_status xnn_define_tensor_value(
186 xnn_subgraph_t subgraph,
187 enum xnn_datatype datatype,
188 size_t num_dims,
189 const size_t* dims,
190 const void* data,
191 uint32_t external_id,
192 uint32_t flags,
193 uint32_t* id_out);
194
Marat Dukhandd2b5882020-02-06 15:12:26 -0800195/// Define a 2D Convolution Node and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800196///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800197/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800198/// @param input_padding_top - implicit zero-padding above 2D input data.
199/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
200/// @param input_padding_bottom - implicit zero-padding below 2D input data.
201/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
202/// @param kernel_height - kernel (filter) height.
203/// @param kernel_width - kernel (filter) width.
204/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
205/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
206/// @param dilation_height - dilation of kernel elements along the height dimension.
207/// @param dilation_width - dilation of kernel elements along the width dimension.
208/// @param groups - number of convolution groups.
209/// @param group_input_channels - number of input channels per group.
210/// @param group_output_channels - number of output channels per group.
211/// @param output_min - lower bound for clipping output values.
212/// @param output_max - upper bound for clipping output values.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800213/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
214/// with [N, IH, IW, groups * group_input_channels] dimensions
215/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
216/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
217/// dimensions.
218/// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
219/// [groups * group_output_channels] dimensions.
220/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
221/// with [N, OH, OW, groups * group_output_channels] dimensions.
222/// @param flags - binary features of the 2D Convolution Node. The only currently supported values is
223/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800224enum xnn_status xnn_define_convolution_2d(
225 xnn_subgraph_t subgraph,
226 uint32_t input_padding_top,
227 uint32_t input_padding_right,
228 uint32_t input_padding_bottom,
229 uint32_t input_padding_left,
230 uint32_t kernel_height,
231 uint32_t kernel_width,
232 uint32_t subsampling_height,
233 uint32_t subsampling_width,
234 uint32_t dilation_height,
235 uint32_t dilation_width,
236 uint32_t groups,
237 size_t group_input_channels,
238 size_t group_output_channels,
239 float output_min,
240 float output_max,
241 uint32_t input_id,
242 uint32_t filter_id,
243 uint32_t bias_id,
244 uint32_t output_id,
245 uint32_t flags);
246
Marat Dukhandd2b5882020-02-06 15:12:26 -0800247/// Define a 2D Depthwise Convolution Node and add it to a Subgraph.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800248///
Marat Dukhandd2b5882020-02-06 15:12:26 -0800249/// @param subgraph - a Subgraph object that will own the created Node.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800250/// @param input_padding_top - implicit zero-padding above 2D input data.
251/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
252/// @param input_padding_bottom - implicit zero-padding below 2D input data.
253/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
254/// @param kernel_height - kernel (filter) height.
255/// @param kernel_width - kernel (filter) width.
256/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
257/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
258/// @param dilation_height - dilation of kernel elements along the height dimension.
259/// @param dilation_width - dilation of kernel elements along the width dimension.
260/// @param depth_multiplier - ratio of output channels to input channels.
261/// @param input_channels - number of input channels.
262/// @param output_min - lower bound for clipping output values.
263/// @param output_max - upper bound for clipping output values.
Marat Dukhandd2b5882020-02-06 15:12:26 -0800264/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
265/// with [N, IH, IW, input_channels] dimensions
266/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
267/// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
268/// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
269/// [input_channels * depth_multiplier] dimensions.
270/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
271/// with [N, OH, OW, input_channels * depth_multiplier] dimensions.
272/// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is
273/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800274enum xnn_status xnn_define_depthwise_convolution_2d(
275 xnn_subgraph_t subgraph,
276 uint32_t input_padding_top,
277 uint32_t input_padding_right,
278 uint32_t input_padding_bottom,
279 uint32_t input_padding_left,
280 uint32_t kernel_height,
281 uint32_t kernel_width,
282 uint32_t subsampling_height,
283 uint32_t subsampling_width,
284 uint32_t dilation_height,
285 uint32_t dilation_width,
286 uint32_t depth_multiplier,
287 size_t input_channels,
288 float output_min,
289 float output_max,
290 uint32_t input_id,
291 uint32_t filter_id,
292 uint32_t bias_id,
293 uint32_t output_id,
294 uint32_t flags);
295
Marat Dukhandd2b5882020-02-06 15:12:26 -0800296/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800297typedef struct xnn_runtime* xnn_runtime_t;
298
Marat Dukhandd2b5882020-02-06 15:12:26 -0800299/// Create a empty Runtime object from a subgraph.
300///
301/// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or
302/// Nodes can be added to the runtime once it is constructed.
303/// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
304/// pool is NULL, the computation would run on the caller thread without parallelization.
305/// @param flags - binary features of the subgraph. No supported flags are currently defined.
306/// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
307/// successful return. Once constructed, the Runtime object is independent of the Subgraph object
308/// used to create it.
Marat Dukhan022c6592020-02-05 18:07:41 -0800309enum xnn_status xnn_create_runtime_v2(
310 xnn_subgraph_t subgraph,
311 pthreadpool_t threadpool,
312 uint32_t flags,
313 xnn_runtime_t* runtime_out);
314
Marat Dukhandd2b5882020-02-06 15:12:26 -0800315enum xnn_status xnn_create_runtime(
316 xnn_subgraph_t subgraph,
317 xnn_runtime_t* runtime_out);
318
Marat Dukhan1d75a542020-02-03 12:23:01 -0800319struct xnn_external_value {
320 uint32_t id;
321 void* data;
322};
323
Marat Dukhandd2b5882020-02-06 15:12:26 -0800324/// Setup data pointers for external inputs and outputs in a Runtime object.
325///
326/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2.
327/// @param num_external_values - the number of external inputs and outputs specified in this call. This number must
328/// match the number of external inputs and outputs in the runtime, i.e. all external
329/// inputs and outputs in the runtime must be specified in one call.
330/// @param external_values - array with location information for all external inputs and outputs in the runtime.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800331enum xnn_status xnn_setup_runtime(
332 xnn_runtime_t runtime,
333 size_t num_external_values,
334 const struct xnn_external_value* external_values);
335
Marat Dukhandd2b5882020-02-06 15:12:26 -0800336/// Execute forward pass for all operators in the runtime.
337///
338/// @param runtime - the Runtime object with the execution plan to invoke.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800339enum xnn_status xnn_invoke_runtime(
340 xnn_runtime_t runtime);
341
Marat Dukhandd2b5882020-02-06 15:12:26 -0800342/// Destroy a Runtime object, as well as operators and memory associated with it.
343///
344/// @param runtime - the Runtime object to destroy.
Marat Dukhan1d75a542020-02-03 12:23:01 -0800345enum xnn_status xnn_delete_runtime(
346 xnn_runtime_t runtime);
347
XNNPACK Teamb455b122019-09-27 18:10:33 -0700348typedef struct xnn_operator* xnn_operator_t;
349
Marat Dukhand6209722019-10-07 12:54:25 -0700350enum xnn_status xnn_run_operator(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800351 xnn_operator_t op,
352 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700353
354enum xnn_status xnn_delete_operator(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800355 xnn_operator_t op);
Marat Dukhand6209722019-10-07 12:54:25 -0700356
357#ifndef XNN_NO_F32_OPERATORS
358
359enum xnn_status xnn_create_add_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800360 size_t channels,
361 size_t a_stride,
362 size_t b_stride,
363 size_t sum_stride,
364 float sum_min,
365 float sum_max,
366 uint32_t flags,
367 xnn_operator_t* add_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700368
369enum xnn_status xnn_setup_add_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800370 xnn_operator_t add_op,
371 size_t batch_size,
372 const float* a,
373 const float* b,
374 float* sum,
375 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700376
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800377enum xnn_status xnn_create_add_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800378 float output_min,
379 float output_max,
380 uint32_t flags,
381 xnn_operator_t* add_op_out);
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800382
383enum xnn_status xnn_setup_add_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800384 xnn_operator_t add_op,
385 size_t num_input1_dims,
386 const size_t* input1_shape,
387 size_t num_input2_dims,
388 const size_t* input2_shape,
389 const float* input1,
390 const float* input2,
391 float* output,
392 pthreadpool_t threadpool);
Marat Dukhanb1a0fc32019-12-02 19:32:02 -0800393
Marat Dukhand6209722019-10-07 12:54:25 -0700394enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800395 uint32_t input_padding_top,
396 uint32_t input_padding_right,
397 uint32_t input_padding_bottom,
398 uint32_t input_padding_left,
399 uint32_t pooling_height,
400 uint32_t pooling_width,
401 size_t channels,
402 size_t input_pixel_stride,
403 size_t output_pixel_stride,
404 float output_min,
405 float output_max,
406 uint32_t flags,
407 xnn_operator_t* argmax_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700408
409enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800410 xnn_operator_t argmax_pooling_op,
411 size_t batch_size,
412 size_t input_height,
413 size_t input_width,
414 const float* input,
415 float* output,
416 uint32_t* index,
417 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700418
419enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800420 uint32_t input_padding_top,
421 uint32_t input_padding_right,
422 uint32_t input_padding_bottom,
423 uint32_t input_padding_left,
424 uint32_t pooling_height,
425 uint32_t pooling_width,
426 uint32_t stride_height,
427 uint32_t stride_width,
428 size_t channels,
429 size_t input_pixel_stride,
430 size_t output_pixel_stride,
431 float output_min,
432 float output_max,
433 uint32_t flags,
434 xnn_operator_t* average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700435
436enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800437 xnn_operator_t average_pooling_op,
438 size_t batch_size,
439 size_t input_height,
440 size_t input_width,
441 const float* input,
442 float* output,
443 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700444
445enum xnn_status xnn_create_clamp_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800446 size_t channels,
447 size_t input_stride,
448 size_t output_stride,
449 float output_min,
450 float output_max,
451 uint32_t flags,
452 xnn_operator_t* clamp_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700453
454enum xnn_status xnn_setup_clamp_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800455 xnn_operator_t clamp_op,
456 size_t batch_size,
457 const float* input,
458 float* output,
459 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700460
461enum xnn_status xnn_create_convolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800462 uint32_t input_padding_top,
463 uint32_t input_padding_right,
464 uint32_t input_padding_bottom,
465 uint32_t input_padding_left,
466 uint32_t kernel_height,
467 uint32_t kernel_width,
468 uint32_t subsampling_height,
469 uint32_t subsampling_width,
470 uint32_t dilation_height,
471 uint32_t dilation_width,
472 uint32_t groups,
473 size_t group_input_channels,
474 size_t group_output_channels,
475 size_t input_pixel_stride,
476 size_t output_pixel_stride,
477 const float* kernel,
478 const float* bias,
479 float output_min,
480 float output_max,
481 uint32_t flags,
482 xnn_operator_t* convolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700483
484enum xnn_status xnn_setup_convolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800485 xnn_operator_t convolution_op,
486 size_t batch_size,
487 size_t input_height,
488 size_t input_width,
489 const float* input,
490 float* output,
491 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700492
493enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800494 uint32_t output_padding_top,
495 uint32_t output_padding_right,
496 uint32_t output_padding_bottom,
497 uint32_t output_padding_left,
498 uint32_t kernel_height,
499 uint32_t kernel_width,
500 uint32_t stride_height,
501 uint32_t stride_width,
502 uint32_t dilation_height,
503 uint32_t dilation_width,
504 uint32_t groups,
505 size_t group_input_channels,
506 size_t group_output_channels,
507 size_t input_pixel_stride,
508 size_t output_pixel_stride,
509 const float* kernel,
510 const float* bias,
511 float output_min,
512 float output_max,
513 uint32_t flags,
514 xnn_operator_t* deconvolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700515
516enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800517 xnn_operator_t deconvolution_op,
518 size_t batch_size,
519 size_t input_height,
520 size_t input_width,
521 uint32_t adjustment_height,
522 uint32_t adjustment_width,
523 const float* input,
524 float* output,
525 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700526
Marat Dukhan69180502019-12-06 15:00:31 -0800527enum xnn_status xnn_create_divide_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800528 float output_min,
529 float output_max,
530 uint32_t flags,
531 xnn_operator_t* divide_op_out);
Marat Dukhan69180502019-12-06 15:00:31 -0800532
533enum xnn_status xnn_setup_divide_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800534 xnn_operator_t divide_op,
535 size_t num_input1_dims,
536 const size_t* input1_shape,
537 size_t num_input2_dims,
538 const size_t* input2_shape,
539 const float* input1,
540 const float* input2,
541 float* output,
542 pthreadpool_t threadpool);
Marat Dukhan69180502019-12-06 15:00:31 -0800543
Marat Dukhand6209722019-10-07 12:54:25 -0700544enum xnn_status xnn_create_fully_connected_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800545 size_t input_channels,
546 size_t output_channels,
547 size_t input_stride,
548 size_t output_stride,
549 const float* kernel,
550 const float* bias,
551 float output_min,
552 float output_max,
553 uint32_t flags,
554 xnn_operator_t* fully_connected_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700555
556enum xnn_status xnn_setup_fully_connected_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800557 xnn_operator_t fully_connected_op,
558 size_t batch_size,
559 const float* input,
560 float* output,
561 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700562
563enum xnn_status xnn_create_global_average_pooling_nwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800564 size_t channels,
565 size_t input_stride,
566 size_t output_stride,
567 float output_min,
568 float output_max,
569 uint32_t flags,
570 xnn_operator_t* global_average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700571
572enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800573 xnn_operator_t global_average_pooling_op,
574 size_t batch_size,
575 size_t width,
576 const float* input,
577 float* output,
578 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700579
580enum xnn_status xnn_create_hardswish_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800581 size_t channels,
582 size_t input_stride,
583 size_t output_stride,
584 uint32_t flags,
585 xnn_operator_t* hardswish_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700586
587enum xnn_status xnn_setup_hardswish_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800588 xnn_operator_t hardswish_op,
589 size_t batch_size,
590 const float* input,
591 float* output,
592 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700593
594enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800595 uint32_t input_padding_top,
596 uint32_t input_padding_right,
597 uint32_t input_padding_bottom,
598 uint32_t input_padding_left,
599 uint32_t pooling_height,
600 uint32_t pooling_width,
601 uint32_t stride_height,
602 uint32_t stride_width,
603 uint32_t dilation_height,
604 uint32_t dilation_width,
605 size_t channels,
606 size_t input_pixel_stride,
607 size_t output_pixel_stride,
608 float output_min,
609 float output_max,
610 uint32_t flags,
611 xnn_operator_t* max_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700612
613enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800614 xnn_operator_t max_pooling_op,
615 size_t batch_size,
616 size_t input_height,
617 size_t input_width,
618 const float* input,
619 float* output,
620 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700621
Marat Dukhan79e7f842019-12-05 14:35:50 -0800622enum xnn_status xnn_create_maximum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800623 uint32_t flags,
624 xnn_operator_t* maximum_op_out);
Marat Dukhan79e7f842019-12-05 14:35:50 -0800625
626enum xnn_status xnn_setup_maximum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800627 xnn_operator_t maximum_op,
628 size_t num_input1_dims,
629 const size_t* input1_shape,
630 size_t num_input2_dims,
631 const size_t* input2_shape,
632 const float* input1,
633 const float* input2,
634 float* output,
635 pthreadpool_t threadpool);
Marat Dukhan79e7f842019-12-05 14:35:50 -0800636
637enum xnn_status xnn_create_minimum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800638 uint32_t flags,
639 xnn_operator_t* minimum_op_out);
Marat Dukhan79e7f842019-12-05 14:35:50 -0800640
641enum xnn_status xnn_setup_minimum_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800642 xnn_operator_t minimum_op,
643 size_t num_input1_dims,
644 const size_t* input1_shape,
645 size_t num_input2_dims,
646 const size_t* input2_shape,
647 const float* input1,
648 const float* input2,
649 float* output,
650 pthreadpool_t threadpool);
Marat Dukhan79e7f842019-12-05 14:35:50 -0800651
Marat Dukhanca2733c2019-11-15 23:21:17 -0800652enum xnn_status xnn_create_multiply_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800653 float output_min,
654 float output_max,
655 uint32_t flags,
656 xnn_operator_t* multiply_op_out);
Marat Dukhanca2733c2019-11-15 23:21:17 -0800657
658enum xnn_status xnn_setup_multiply_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800659 xnn_operator_t multiply_op,
660 size_t num_input1_dims,
661 const size_t* input1_shape,
662 size_t num_input2_dims,
663 const size_t* input2_shape,
664 const float* input1,
665 const float* input2,
666 float* output,
667 pthreadpool_t threadpool);
Marat Dukhanca2733c2019-11-15 23:21:17 -0800668
Marat Dukhand6209722019-10-07 12:54:25 -0700669enum xnn_status xnn_create_prelu_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800670 size_t channels,
671 size_t input_stride,
672 size_t output_stride,
673 const float* negative_slope,
674 float output_min,
675 float output_max,
676 uint32_t flags,
677 xnn_operator_t* prelu_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700678
679enum xnn_status xnn_setup_prelu_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800680 xnn_operator_t prelu_op,
681 size_t batch_size,
682 const float* input,
683 float* output,
684 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700685
Marat Dukhan69722492019-11-11 19:55:50 -0800686enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800687 size_t channels,
688 size_t input_pixel_stride,
689 size_t output_pixel_stride,
690 uint32_t flags,
691 xnn_operator_t* resize_op_out);
Marat Dukhan69722492019-11-11 19:55:50 -0800692
693enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800694 xnn_operator_t resize_op,
695 size_t batch_size,
696 size_t input_height,
697 size_t input_width,
698 size_t output_height,
699 size_t output_width,
700 const float* input,
701 float* output,
702 pthreadpool_t threadpool);
Marat Dukhan69722492019-11-11 19:55:50 -0800703
Marat Dukhan346a9e52019-11-15 09:06:30 -0800704enum xnn_status xnn_create_sigmoid_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800705 size_t channels,
706 size_t input_stride,
707 size_t output_stride,
708 uint32_t flags,
709 xnn_operator_t* sigmoid_op_out);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800710
711enum xnn_status xnn_setup_sigmoid_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800712 xnn_operator_t sigmoid_op,
713 size_t batch_size,
714 const float* input,
715 float* output,
716 pthreadpool_t threadpool);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800717
Marat Dukhanfd8e6892020-01-27 15:25:25 -0800718enum xnn_status xnn_create_softmax_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800719 size_t channels,
720 size_t input_stride,
721 size_t output_stride,
722 uint32_t flags,
723 xnn_operator_t* softmax_op_out);
Marat Dukhan1edc4542020-01-27 12:40:13 -0800724
Marat Dukhanfd8e6892020-01-27 15:25:25 -0800725enum xnn_status xnn_setup_softmax_nc_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800726 xnn_operator_t softmax_op,
727 size_t batch_size,
728 const float* input,
729 float* output,
730 pthreadpool_t threadpool);
Marat Dukhan1edc4542020-01-27 12:40:13 -0800731
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800732enum xnn_status xnn_create_subtract_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800733 float output_min,
734 float output_max,
735 uint32_t flags,
736 xnn_operator_t* subtract_op_out);
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800737
738enum xnn_status xnn_setup_subtract_nd_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800739 xnn_operator_t subtract_op,
740 size_t num_input1_dims,
741 const size_t* input1_shape,
742 size_t num_input2_dims,
743 const size_t* input2_shape,
744 const float* input1,
745 const float* input2,
746 float* output,
747 pthreadpool_t threadpool);
Marat Dukhan05f3f6d2019-12-03 15:13:53 -0800748
Marat Dukhanefc47b82019-11-18 09:25:38 -0800749#ifndef XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700750
Marat Dukhanefc47b82019-11-18 09:25:38 -0800751enum xnn_status xnn_create_convolution2d_nchw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800752 uint32_t input_padding_top,
753 uint32_t input_padding_right,
754 uint32_t input_padding_bottom,
755 uint32_t input_padding_left,
756 uint32_t kernel_height,
757 uint32_t kernel_width,
758 uint32_t subsampling_height,
759 uint32_t subsampling_width,
760 uint32_t dilation_height,
761 uint32_t dilation_width,
762 uint32_t groups,
763 size_t group_input_channels,
764 size_t group_output_channels,
765 const float* kernel,
766 const float* bias,
767 float output_min,
768 float output_max,
769 uint32_t flags,
770 xnn_operator_t* convolution_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700771
Marat Dukhanefc47b82019-11-18 09:25:38 -0800772enum xnn_status xnn_setup_convolution2d_nchw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800773 xnn_operator_t convolution_op,
774 size_t batch_size,
775 size_t input_batch_stride,
776 size_t output_batch_stride,
777 size_t input_height,
778 size_t input_width,
779 const float* input,
780 float* output,
781 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700782
Marat Dukhanefc47b82019-11-18 09:25:38 -0800783enum xnn_status xnn_create_global_average_pooling_ncw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800784 size_t channels,
785 float output_min,
786 float output_max,
787 uint32_t flags,
788 xnn_operator_t* global_average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700789
Marat Dukhanefc47b82019-11-18 09:25:38 -0800790enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800791 xnn_operator_t global_average_pooling_op,
792 size_t batch_size,
793 size_t width,
794 const float* input,
795 float* output,
796 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700797
Marat Dukhanefc47b82019-11-18 09:25:38 -0800798#endif // XNN_NO_NCHW_OPERATORS
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700799
Marat Dukhand6209722019-10-07 12:54:25 -0700800#endif // XNN_NO_F32_OPERATORS
801
802#ifndef XNN_NO_X32_OPERATORS
803
804enum xnn_status xnn_create_channel_pad_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800805 size_t input_channels,
806 size_t pad_before_channels,
807 size_t pad_after_channels,
808 size_t input_stride,
809 size_t output_stride,
810 const void* pad_value,
811 uint32_t flags,
812 xnn_operator_t* channel_pad_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700813
814enum xnn_status xnn_setup_channel_pad_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800815 xnn_operator_t channel_pad_op,
816 size_t batch_size,
817 const void* input,
818 void* output,
819 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700820
821enum xnn_status xnn_create_channel_shuffle_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800822 size_t groups,
823 size_t group_channels,
824 size_t input_stride,
825 size_t output_stride,
826 uint32_t flags,
827 xnn_operator_t* channel_shuffle_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700828
829enum xnn_status xnn_setup_channel_shuffle_nc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800830 xnn_operator_t channel_shuffle_op,
831 size_t batch_size,
832 const void* input,
833 void* output,
834 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700835
836enum xnn_status xnn_create_unpooling2d_nhwc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800837 uint32_t input_padding_top,
838 uint32_t input_padding_right,
839 uint32_t input_padding_bottom,
840 uint32_t input_padding_left,
841 uint32_t pooling_height,
842 uint32_t pooling_width,
843 size_t channels,
844 size_t input_pixel_stride,
845 size_t output_pixel_stride,
846 uint32_t flags,
847 xnn_operator_t* unpooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700848
849enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800850 xnn_operator_t unpooling_op,
851 size_t batch_size,
852 size_t input_height,
853 size_t input_width,
854 const void* input,
855 const uint32_t* index,
856 void* output,
857 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700858
859#endif // XNN_NO_X32_OPERATORS
860
861#ifndef XNN_NO_Q8_OPERATORS
862
863enum xnn_status xnn_create_add_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800864 size_t channels,
865 size_t a_stride,
866 size_t b_stride,
867 size_t sum_stride,
868 uint8_t a_zero_point,
869 float a_scale,
870 uint8_t b_zero_point,
871 float b_scale,
872 uint8_t sum_zero_point,
873 float sum_scale,
874 uint8_t sum_min,
875 uint8_t sum_max,
876 uint32_t flags,
877 xnn_operator_t* add_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700878
879enum xnn_status xnn_setup_add_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800880 xnn_operator_t add_op,
881 size_t batch_size,
882 const uint8_t* a,
883 const uint8_t* b,
884 uint8_t* sum,
885 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700886
887enum xnn_status xnn_create_average_pooling2d_nhwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800888 uint32_t input_padding_top,
889 uint32_t input_padding_right,
890 uint32_t input_padding_bottom,
891 uint32_t input_padding_left,
892 uint32_t pooling_height,
893 uint32_t pooling_width,
894 uint32_t stride_height,
895 uint32_t stride_width,
896 size_t channels,
897 size_t input_pixel_stride,
898 size_t output_pixel_stride,
899 uint8_t input_zero_point,
900 float input_scale,
901 uint8_t output_zero_point,
902 float output_scale,
903 uint8_t output_min,
904 uint8_t output_max,
905 uint32_t flags,
906 xnn_operator_t* average_pooling_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -0700907
908enum xnn_status xnn_setup_average_pooling2d_nhwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800909 xnn_operator_t average_pooling_op,
910 size_t batch_size,
911 size_t input_height,
912 size_t input_width,
913 const uint8_t* input,
914 uint8_t* output,
915 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -0700916
XNNPACK Teamb455b122019-09-27 18:10:33 -0700917enum xnn_status xnn_create_convolution2d_nhwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800918 uint32_t input_padding_top,
919 uint32_t input_padding_right,
920 uint32_t input_padding_bottom,
921 uint32_t input_padding_left,
922 uint32_t kernel_height,
923 uint32_t kernel_width,
924 uint32_t subsampling_height,
925 uint32_t subsampling_width,
926 uint32_t dilation_height,
927 uint32_t dilation_width,
928 uint32_t groups,
929 size_t group_input_channels,
930 size_t group_output_channels,
931 size_t input_pixel_stride,
932 size_t output_pixel_stride,
933 uint8_t input_zero_point,
934 float input_scale,
935 uint8_t kernel_zero_point,
936 float kernel_scale,
937 const uint8_t* kernel,
938 const int32_t* bias,
939 uint8_t output_zero_point,
940 float output_scale,
941 uint8_t output_min,
942 uint8_t output_max,
943 uint32_t flags,
944 xnn_operator_t* convolution_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700945
946enum xnn_status xnn_setup_convolution2d_nhwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800947 xnn_operator_t convolution_op,
948 size_t batch_size,
949 size_t input_height,
950 size_t input_width,
951 const uint8_t* input,
952 uint8_t* output,
953 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700954
XNNPACK Teamb455b122019-09-27 18:10:33 -0700955enum xnn_status xnn_create_deconvolution2d_nhwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800956 uint32_t output_padding_top,
957 uint32_t output_padding_right,
958 uint32_t output_padding_bottom,
959 uint32_t output_padding_left,
960 uint32_t kernel_height,
961 uint32_t kernel_width,
962 uint32_t stride_height,
963 uint32_t stride_width,
964 uint32_t dilation_height,
965 uint32_t dilation_width,
966 uint32_t groups,
967 size_t group_input_channels,
968 size_t group_output_channels,
969 size_t input_pixel_stride,
970 size_t output_pixel_stride,
971 uint8_t input_zero_point,
972 float input_scale,
973 uint8_t kernel_zero_point,
974 float kernel_scale,
975 const uint8_t* kernel,
976 const int32_t* bias,
977 uint8_t output_zero_point,
978 float output_scale,
979 uint8_t output_min,
980 uint8_t output_max,
981 uint32_t flags,
982 xnn_operator_t* deconvolution_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700983
984enum xnn_status xnn_setup_deconvolution2d_nhwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800985 xnn_operator_t deconvolution_op,
986 size_t batch_size,
987 size_t input_height,
988 size_t input_width,
989 uint32_t adjustment_height,
990 uint32_t adjustment_width,
991 const uint8_t* input,
992 uint8_t* output,
993 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700994
XNNPACK Teamb455b122019-09-27 18:10:33 -0700995enum xnn_status xnn_create_fully_connected_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -0800996 size_t input_channels,
997 size_t output_channels,
998 size_t input_stride,
999 size_t output_stride,
1000 uint8_t input_zero_point,
1001 float input_scale,
1002 uint8_t kernel_zero_point,
1003 float kernel_scale,
1004 const uint8_t* kernel,
1005 const int32_t* bias,
1006 uint8_t output_zero_point,
1007 float output_scale,
1008 uint8_t output_min,
1009 uint8_t output_max,
1010 uint32_t flags,
1011 xnn_operator_t* fully_connected_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001012
1013enum xnn_status xnn_setup_fully_connected_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001014 xnn_operator_t fully_connected_op,
1015 size_t batch_size,
1016 const uint8_t* input,
1017 uint8_t* output,
1018 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001019
XNNPACK Teamb455b122019-09-27 18:10:33 -07001020enum xnn_status xnn_create_global_average_pooling_nwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001021 size_t channels,
1022 size_t input_stride,
1023 size_t output_stride,
1024 uint8_t input_zero_point,
1025 float input_scale,
1026 uint8_t output_zero_point,
1027 float output_scale,
1028 uint8_t output_min,
1029 uint8_t output_max,
1030 uint32_t flags,
1031 xnn_operator_t* global_average_pooling_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001032
1033enum xnn_status xnn_setup_global_average_pooling_nwc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001034 xnn_operator_t global_average_pooling_op,
1035 size_t batch_size,
1036 size_t width,
1037 const uint8_t* input,
1038 uint8_t* output,
1039 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001040
Marat Dukhand6209722019-10-07 12:54:25 -07001041enum xnn_status xnn_create_leaky_relu_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001042 size_t channels,
1043 size_t input_stride,
1044 size_t output_stride,
1045 float negative_slope,
1046 uint8_t input_zero_point,
1047 float input_scale,
1048 uint8_t output_zero_point,
1049 float output_scale,
1050 uint8_t output_min,
1051 uint8_t output_max,
1052 uint32_t flags,
1053 xnn_operator_t* leaky_relu_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001054
Marat Dukhand6209722019-10-07 12:54:25 -07001055enum xnn_status xnn_setup_leaky_relu_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001056 xnn_operator_t leaky_relu_op,
1057 size_t batch_size,
1058 const uint8_t* input,
1059 uint8_t* output,
1060 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001061
Marat Dukhand6209722019-10-07 12:54:25 -07001062enum xnn_status xnn_create_sigmoid_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001063 size_t channels,
1064 size_t input_stride,
1065 size_t output_stride,
1066 uint8_t input_zero_point,
1067 float input_scale,
1068 uint8_t output_zero_point,
1069 float output_scale,
1070 uint8_t output_min,
1071 uint8_t output_max,
1072 uint32_t flags,
1073 xnn_operator_t* sigmoid_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001074
Marat Dukhand6209722019-10-07 12:54:25 -07001075enum xnn_status xnn_setup_sigmoid_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001076 xnn_operator_t sigmoid_op,
1077 size_t batch_size,
1078 const uint8_t* input,
1079 uint8_t* output,
1080 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001081
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001082enum xnn_status xnn_create_softmax_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001083 size_t channels,
1084 size_t input_stride,
1085 size_t output_stride,
1086 float input_scale,
1087 uint8_t output_zero_point,
1088 float output_scale,
1089 uint32_t flags,
1090 xnn_operator_t* softmax_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001091
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001092enum xnn_status xnn_setup_softmax_nc_q8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001093 xnn_operator_t softmax_op,
1094 size_t batch_size,
1095 const uint8_t* input,
1096 uint8_t* output,
1097 pthreadpool_t threadpool);
Marat Dukhand6209722019-10-07 12:54:25 -07001098
1099#endif // XNN_NO_Q8_OPERATORS
1100
1101#ifndef XNN_NO_U8_OPERATORS
1102
1103enum xnn_status xnn_create_clamp_nc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001104 size_t channels,
1105 size_t input_stride,
1106 size_t output_stride,
1107 uint8_t output_min,
1108 uint8_t output_max,
1109 uint32_t flags,
1110 xnn_operator_t* clamp_op_out);
Marat Dukhand6209722019-10-07 12:54:25 -07001111
1112enum xnn_status xnn_setup_clamp_nc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001113 xnn_operator_t clamp_op,
1114 size_t batch_size,
1115 const uint8_t* input,
1116 uint8_t* output,
1117 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001118
1119enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001120 uint32_t input_padding_top,
1121 uint32_t input_padding_right,
1122 uint32_t input_padding_bottom,
1123 uint32_t input_padding_left,
1124 uint32_t pooling_height,
1125 uint32_t pooling_width,
1126 uint32_t stride_height,
1127 uint32_t stride_width,
1128 uint32_t dilation_height,
1129 uint32_t dilation_width,
1130 size_t channels,
1131 size_t input_pixel_stride,
1132 size_t output_pixel_stride,
1133 uint8_t output_min,
1134 uint8_t output_max,
1135 uint32_t flags,
1136 xnn_operator_t* max_pooling_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001137
1138enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001139 xnn_operator_t max_pooling_op,
1140 size_t batch_size,
1141 size_t input_height,
1142 size_t input_width,
1143 const uint8_t* input,
1144 uint8_t* output,
1145 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001146
Marat Dukhand6209722019-10-07 12:54:25 -07001147#endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001148
Marat Dukhand6209722019-10-07 12:54:25 -07001149#ifndef XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001150
1151enum xnn_status xnn_create_channel_shuffle_nc_x8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001152 size_t groups,
1153 size_t group_channels,
1154 size_t input_stride,
1155 size_t output_stride,
1156 uint32_t flags,
1157 xnn_operator_t* channel_shuffle_op_out);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001158
1159enum xnn_status xnn_setup_channel_shuffle_nc_x8(
Marat Dukhan03bc4072020-01-28 14:52:25 -08001160 xnn_operator_t channel_shuffle_op,
1161 size_t batch_size,
1162 const void* input,
1163 void* output,
1164 pthreadpool_t threadpool);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001165
Marat Dukhand6209722019-10-07 12:54:25 -07001166#endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001167
1168#ifdef __cplusplus
1169} // extern "C"
1170#endif