blob: a55783948a09b4576a66104a218b22d8409a95d2 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <assert.h>
10#include <math.h>
11#include <stddef.h>
12#include <stdint.h>
13#include <stdlib.h>
14
15#include <xnnpack.h>
16#include <xnnpack/allocator.h>
17#include <xnnpack/operator.h>
18#include <xnnpack/log.h>
19
20
Marat Dukhan08b7a972020-07-14 18:17:29 -070021enum xnn_status xnn_create_leaky_relu_nc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070022 size_t channels,
23 size_t input_stride,
24 size_t output_stride,
25 float negative_slope,
26 uint8_t input_zero_point,
27 float input_scale,
28 uint8_t output_zero_point,
29 float output_scale,
30 uint8_t output_min,
31 uint8_t output_max,
32 uint32_t flags,
33 xnn_operator_t* leaky_relu_op_out)
34{
35 xnn_operator_t leaky_relu_op = NULL;
36 enum xnn_status status = xnn_status_uninitialized;
37
Marat Dukhan854fb6b2020-06-19 12:33:44 -070038 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
Marat Dukhan3b59de22020-06-03 20:15:19 -070039 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
Marat Dukhan08b7a972020-07-14 18:17:29 -070040 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
XNNPACK Teamb455b122019-09-27 18:10:33 -070041 goto error;
42 }
43
44 status = xnn_status_invalid_parameter;
45
46 if (channels == 0) {
47 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070048 "failed to create %s operator with %zu channels: number of channels must be non-zero",
Marat Dukhan08b7a972020-07-14 18:17:29 -070049 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), channels);
XNNPACK Teamb455b122019-09-27 18:10:33 -070050 goto error;
51 }
52
53 if (input_stride < channels) {
54 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070055 "failed to create %s operator with input element stride of %zu: "
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 "stride must be at least as large as the number of channels (%zu)",
Marat Dukhan08b7a972020-07-14 18:17:29 -070057 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_stride, channels);
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 goto error;
59 }
60
61 if (output_stride < channels) {
62 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070063 "failed to create %s operator with output element stride of %zu: "
XNNPACK Teamb455b122019-09-27 18:10:33 -070064 "stride must be at least as large as the number of channels (%zu)",
Marat Dukhan08b7a972020-07-14 18:17:29 -070065 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_stride, channels);
XNNPACK Teamb455b122019-09-27 18:10:33 -070066 goto error;
67 }
68
69 if (negative_slope <= 0.0f || !isnormal(negative_slope)) {
70 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070071 "failed to create %s operator with %.7g negative slope: slope must be finite, normalized, and positive",
Marat Dukhan08b7a972020-07-14 18:17:29 -070072 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_slope);
XNNPACK Teamb455b122019-09-27 18:10:33 -070073 goto error;
74 }
75
76 if (negative_slope > 1.0f) {
77 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070078 "failed to create %s operator with %.7g negative slope: slope must not exceed 1.0",
Marat Dukhan08b7a972020-07-14 18:17:29 -070079 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_slope);
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 goto error;
81 }
82
83 if (input_scale <= 0.0f || !isnormal(input_scale)) {
84 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070085 "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
Marat Dukhan08b7a972020-07-14 18:17:29 -070086 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 goto error;
88 }
89
90 if (output_scale <= 0.0f || !isnormal(output_scale)) {
91 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070092 "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
Marat Dukhan08b7a972020-07-14 18:17:29 -070093 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_scale);
XNNPACK Teamb455b122019-09-27 18:10:33 -070094 goto error;
95 }
96
97 if (output_min >= output_max) {
98 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -070099 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
Marat Dukhan08b7a972020-07-14 18:17:29 -0700100 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700101 goto error;
102 }
103
104 status = xnn_status_unsupported_parameter;
105
106 const float input_output_scale = input_scale / output_scale;
107 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
108 xnn_log_error(
Marat Dukhan3b59de22020-06-03 20:15:19 -0700109 "failed to create %s operator with %.7g input-to-output scale ratio: "
XNNPACK Teamb455b122019-09-27 18:10:33 -0700110 "scale ratio must be in [2**-8, 2**8) range",
Marat Dukhan08b7a972020-07-14 18:17:29 -0700111 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_output_scale);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 goto error;
113 }
114
115 status = xnn_status_out_of_memory;
116
Marat Dukhan04f03be2019-11-19 12:36:47 -0800117 leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118 if (leaky_relu_op == NULL) {
Marat Dukhan3b59de22020-06-03 20:15:19 -0700119 xnn_log_error(
120 "failed to allocate %zu bytes for %s operator descriptor",
Marat Dukhan08b7a972020-07-14 18:17:29 -0700121 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700122 goto error;
123 }
124
Marat Dukhan04f03be2019-11-19 12:36:47 -0800125 leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126 if (leaky_relu_op->lookup_table == NULL) {
Marat Dukhan3b59de22020-06-03 20:15:19 -0700127 xnn_log_error(
128 "failed to allocate 256 bytes for %s operator lookup table",
Marat Dukhan08b7a972020-07-14 18:17:29 -0700129 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700130 goto error;
131 }
132
133 uint8_t* lookup_table = leaky_relu_op->lookup_table;
134 const float scaled_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
135 const float scaled_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
136 for (int32_t i = 0; i < 256; i++) {
137 const float x = input_output_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
138 float y = x < 0.0f ? x * negative_slope : x;
139 if (y < scaled_min_less_zero_point) {
140 y = scaled_min_less_zero_point;
141 }
142 if (y > scaled_max_less_zero_point) {
143 y = scaled_max_less_zero_point;
144 }
145 lookup_table[(uint32_t) i] = (uint8_t) (lrintf(y) + (long) output_zero_point);
146 }
147
148 leaky_relu_op->channels = channels;
149 leaky_relu_op->input_pixel_stride = input_stride;
150 leaky_relu_op->output_pixel_stride = output_stride;
151
Marat Dukhan08b7a972020-07-14 18:17:29 -0700152 leaky_relu_op->type = xnn_operator_type_leaky_relu_nc_qu8;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700153 leaky_relu_op->ukernel.type = xnn_ukernel_type_lut;
154
155 leaky_relu_op->state = xnn_run_state_invalid;
156
157 *leaky_relu_op_out = leaky_relu_op;
158 return xnn_status_success;
159
160error:
161 xnn_delete_operator(leaky_relu_op);
162 return status;
163}
164
Marat Dukhan08b7a972020-07-14 18:17:29 -0700165enum xnn_status xnn_setup_leaky_relu_nc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700166 xnn_operator_t leaky_relu_op,
167 size_t batch_size,
168 const uint8_t* input,
169 uint8_t* output,
170 pthreadpool_t threadpool)
171{
Marat Dukhan08b7a972020-07-14 18:17:29 -0700172 if (leaky_relu_op->type != xnn_operator_type_leaky_relu_nc_qu8) {
Marat Dukhan3b59de22020-06-03 20:15:19 -0700173 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
Marat Dukhan08b7a972020-07-14 18:17:29 -0700174 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8),
Marat Dukhan3b59de22020-06-03 20:15:19 -0700175 xnn_operator_type_to_string(leaky_relu_op->type));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700176 return xnn_status_invalid_parameter;
177 }
178 leaky_relu_op->state = xnn_run_state_invalid;
179
Marat Dukhan854fb6b2020-06-19 12:33:44 -0700180 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
Marat Dukhan3b59de22020-06-03 20:15:19 -0700181 xnn_log_error(
182 "failed to setup %s operator: XNNPACK is not initialized",
Marat Dukhan08b7a972020-07-14 18:17:29 -0700183 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700184 return xnn_status_uninitialized;
185 }
186
187 if (batch_size == 0) {
188 leaky_relu_op->state = xnn_run_state_skip;
189 return xnn_status_success;
190 }
191
192 const size_t channels = leaky_relu_op->channels;
193 const size_t input_stride = leaky_relu_op->input_pixel_stride;
194 const size_t output_stride = leaky_relu_op->output_pixel_stride;
195 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
196 const size_t block_size = 1024;
197 leaky_relu_op->context.lut_contiguous = (struct lut_contiguous_context) {
198 .x = input,
199 .x_stride = input_stride * sizeof(uint8_t),
200 .t = leaky_relu_op->lookup_table,
201 .y = output,
202 .y_stride = output_stride * sizeof(uint8_t),
203 .ukernel = xnn_params.x8.lut,
204 };
205 leaky_relu_op->compute.type = xnn_parallelization_type_1d_tile_1d;
206 leaky_relu_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
207 leaky_relu_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
208 leaky_relu_op->compute.tile[0] = block_size;
209 } else {
210 leaky_relu_op->context.lut_strided = (struct lut_strided_context) {
211 .n = channels,
212 .x = input,
213 .x_stride = input_stride * sizeof(uint8_t),
214 .t = leaky_relu_op->lookup_table,
215 .y = output,
216 .y_stride = output_stride * sizeof(uint8_t),
217 .ukernel = xnn_params.x8.lut,
218 };
219 leaky_relu_op->compute.type = xnn_parallelization_type_1d;
220 leaky_relu_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
221 leaky_relu_op->compute.range[0] = batch_size;
222 leaky_relu_op->compute.tile[0] = 0;
223 }
224 leaky_relu_op->state = xnn_run_state_ready;
225
226 return xnn_status_success;
227}