blob: d459d3d633756e99d8e4b7c867f28956b4080a20 [file] [log] [blame]
Marat Dukhanfd8e6892020-01-27 15:25:25 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <assert.h>
10#include <math.h>
11#include <stddef.h>
12#include <stdint.h>
13#include <stdlib.h>
14
15#include <xnnpack.h>
16#include <xnnpack/allocator.h>
17#include <xnnpack/operator.h>
18#include <xnnpack/log.h>
19#include <xnnpack/params-init.h>
20
21
22enum xnn_status xnn_create_softmax_nc_q8(
23 size_t channels,
24 size_t input_stride,
25 size_t output_stride,
26 float input_scale,
27 uint8_t output_zero_point,
28 float output_scale,
29 uint32_t flags,
30 xnn_operator_t* softmax_op_out)
31{
32 xnn_operator_t softmax_op = NULL;
33 enum xnn_status status = xnn_status_uninitialized;
34
35 if (!xnn_params.initialized) {
36 xnn_log_error("failed to create SoftMax operator: XNNPACK is not initialized");
37 goto error;
38 }
39
40 status = xnn_status_invalid_parameter;
41
42 if (channels == 0) {
43 xnn_log_error(
44 "failed to create SoftMax operator with %zu channels: number of channels must be non-zero", channels);
45 goto error;
46 }
47
48 if (input_stride < channels) {
49 xnn_log_error(
50 "failed to create SoftMax operator with input element stride of %zu: "
51 "stride must be at least as large as the number of channels (%zu)",
52 input_stride, channels);
53 goto error;
54 }
55
56 if (output_stride < channels) {
57 xnn_log_error(
58 "failed to create SoftMax operator with output element stride of %zu: "
59 "stride must be at least as large as the number of channels (%zu)",
60 output_stride, channels);
61 goto error;
62 }
63
64 if (input_scale <= 0.0f || !isnormal(input_scale)) {
65 xnn_log_error(
66 "failed to create SoftMax operator with %.7g input scale: scale must be finite, normalized, and positive",
67 input_scale);
68 goto error;
69 }
70
71 if (output_scale <= 0.0f || !isnormal(output_scale)) {
72 xnn_log_error(
73 "failed to create SoftMax operator with %.7g output scale: scale must be finite, normalized, and positive",
74 output_scale);
75 goto error;
76 }
77
78 status = xnn_status_unsupported_parameter;
79
80 if (output_scale != 0x1.0p-8f) {
81 xnn_log_error(
82 "failed to create SoftMax operator with %.7g output scale: only output scale of 1/256 is supported",
83 output_scale);
84 goto error;
85 }
86
87 if (output_zero_point != 0) {
88 xnn_log_error(
89 "failed to create SoftMax operator with %" PRIu8 " output zero point: "
90 "only output zero point of 0 is supported",
91 output_zero_point);
92 goto error;
93 }
94
95 status = xnn_status_out_of_memory;
96
97 softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
98 if (softmax_op == NULL) {
99 xnn_log_error("failed to allocate %zu bytes for SoftMax operator descriptor", sizeof(struct xnn_operator));
100 goto error;
101 }
102
103 softmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
104 if (softmax_op->lookup_table == NULL) {
105 xnn_log_error("failed to allocate 256 bytes for SoftMax lookup table");
106 goto error;
107 }
108
109 uint32_t* lookup_table = softmax_op->lookup_table;
110 const double qscale = fmin(((double) UINT32_MAX) / (double) channels, 8388607.0);
111 for (int32_t i = 0; i < 256; i++) {
112 const double scaled_exp_xi = qscale * exp((double) (i - 255) * (double) input_scale);
113 lookup_table[(uint32_t) i] = (uint32_t) lrint(scaled_exp_xi);
114 }
115
116 softmax_op->channels = channels;
117 softmax_op->input_pixel_stride = input_stride;
118 softmax_op->output_pixel_stride = output_stride;
119
120 softmax_op->type = xnn_operator_type_softmax_nc_q8;
121 softmax_op->ukernel.type = xnn_ukernel_type_softmax;
122
123 softmax_op->state = xnn_run_state_invalid;
124
125 *softmax_op_out = softmax_op;
126 return xnn_status_success;
127
128error:
129 xnn_delete_operator(softmax_op);
130 return status;
131}
132
133enum xnn_status xnn_setup_softmax_nc_q8(
134 xnn_operator_t softmax_op,
135 size_t batch_size,
136 const uint8_t* input,
137 uint8_t* output,
138 pthreadpool_t threadpool)
139{
140 if (softmax_op->type != xnn_operator_type_softmax_nc_q8) {
141 xnn_log_error("failed to setup SoftMax (NC, Q8) operator: operator type mismatch");
142 return xnn_status_invalid_parameter;
143 }
144 softmax_op->state = xnn_run_state_invalid;
145
146 if (!xnn_params.initialized) {
147 xnn_log_error("failed to setup SoftMax operator: XNNPACK is not initialized");
148 return xnn_status_uninitialized;
149 }
150
151 if (batch_size == 0) {
152 softmax_op->state = xnn_run_state_skip;
153 return xnn_status_success;
154 }
155
156 softmax_op->batch_size = batch_size;
157 softmax_op->input = input;
158 softmax_op->output = output;
159
160 softmax_op->context.u8_softmax = (struct u8_softmax_context) {
161 .n = softmax_op->channels,
162 .x = input,
163 .x_stride = softmax_op->input_pixel_stride * sizeof(uint8_t),
164 .t = softmax_op->lookup_table,
165 .y = output,
166 .y_stride = softmax_op->output_pixel_stride * sizeof(uint8_t),
167 .rmax_ukernel = xnn_params.u8.rmax,
168 .lut_norm_ukernel = xnn_params.u8.lut32norm,
169 };
170 softmax_op->compute.type = xnn_parallelization_type_1d;
171 softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_u8_softmax;
172 softmax_op->compute.range[0] = batch_size;
173 softmax_op->state = xnn_run_state_ready;
174
175 return xnn_status_success;
176}
177
178enum xnn_status xnn_create_softmax_nc_f32(
179 size_t channels,
180 size_t input_stride,
181 size_t output_stride,
182 uint32_t flags,
183 xnn_operator_t* softmax_op_out)
184{
185 xnn_operator_t softmax_op = NULL;
186 enum xnn_status status = xnn_status_uninitialized;
187
188 if (!xnn_params.initialized) {
189 xnn_log_error("failed to create SoftMax operator: XNNPACK is not initialized");
190 goto error;
191 }
192
193 status = xnn_status_invalid_parameter;
194
195 if (channels == 0) {
196 xnn_log_error(
197 "failed to create SoftMax operator with %zu channels: number of channels must be non-zero", channels);
198 goto error;
199 }
200
201 if (input_stride < channels) {
202 xnn_log_error(
203 "failed to create SoftMax operator with input element stride of %zu: "
204 "stride must be at least as large as the number of channels (%zu)",
205 input_stride, channels);
206 goto error;
207 }
208
209 if (output_stride < channels) {
210 xnn_log_error(
211 "failed to create SoftMax operator with output element stride of %zu: "
212 "stride must be at least as large as the number of channels (%zu)",
213 output_stride, channels);
214 goto error;
215 }
216
217 status = xnn_status_out_of_memory;
218
219 softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
220 if (softmax_op == NULL) {
221 xnn_log_error("failed to allocate %zu bytes for SoftMax operator descriptor", sizeof(struct xnn_operator));
222 goto error;
223 }
224
225 softmax_op->channels = channels;
226 softmax_op->input_pixel_stride = input_stride;
227 softmax_op->output_pixel_stride = output_stride;
228
229 softmax_op->type = xnn_operator_type_softmax_nc_f32;
230 softmax_op->ukernel.type = xnn_ukernel_type_softmax;
231
232 softmax_op->state = xnn_run_state_invalid;
233
234 *softmax_op_out = softmax_op;
235 return xnn_status_success;
236
237error:
238 xnn_delete_operator(softmax_op);
239 return status;
240}
241
242enum xnn_status xnn_setup_softmax_nc_f32(
243 xnn_operator_t softmax_op,
244 size_t batch_size,
245 const float* input,
246 float* output,
247 pthreadpool_t threadpool)
248{
249 if (softmax_op->type != xnn_operator_type_softmax_nc_f32) {
250 xnn_log_error("failed to setup SoftMax (NC, F32) operator: operator type mismatch");
251 return xnn_status_invalid_parameter;
252 }
253 softmax_op->state = xnn_run_state_invalid;
254
255 if (!xnn_params.initialized) {
256 xnn_log_error("failed to setup SoftMax operator: XNNPACK is not initialized");
257 return xnn_status_uninitialized;
258 }
259
260 if (batch_size == 0) {
261 softmax_op->state = xnn_run_state_skip;
262 return xnn_status_success;
263 }
264
265 softmax_op->batch_size = batch_size;
266 softmax_op->input = input;
267 softmax_op->output = output;
268
269 softmax_op->context.f32_three_pass_softmax = (struct f32_three_pass_softmax_context) {
270 .n = softmax_op->channels * sizeof(float),
271 .x = input,
272 .x_stride = softmax_op->input_pixel_stride * sizeof(float),
273 .y = output,
274 .y_stride = softmax_op->output_pixel_stride * sizeof(float),
275 .rmax_ukernel = xnn_params.f32.rmax,
276 .raddstoreexpminusmax_ukernel = xnn_params.f32.raddstoreexpminusmax,
277 .vmulc_ukernel = xnn_params.f32.vmul.opc_ukernel,
278 .params = xnn_init_f32_output_params(-INFINITY, INFINITY),
279 };
280 softmax_op->compute.type = xnn_parallelization_type_1d;
281 softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_f32_three_pass_softmax;
282 softmax_op->compute.range[0] = batch_size;
283 softmax_op->state = xnn_run_state_ready;
284
285 return xnn_status_success;
286}