blob: 657c6c628b32c56bc9e763bc64cdda567c4bad69 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <assert.h>
10#include <math.h>
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <string.h>
16
17#include <xnnpack.h>
18#include <xnnpack/allocator.h>
19#include <xnnpack/operator.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/common.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070021#include <xnnpack/log.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include <xnnpack/math.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070023#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070024#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/indirection.h>
26
27
28static inline size_t compute_output_dimension(
29 size_t padded_input_dimension,
30 size_t pooling_dimension,
31 size_t stride_dimension)
32{
33 return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
34}
35
Marat Dukhan466da752020-02-28 02:00:49 -080036static inline size_t compute_output_dimension_with_tf_same_padding(
37 size_t input_dimension,
38 size_t stride_dimension)
39{
40 return divide_round_up(input_dimension, stride_dimension);
41}
42
XNNPACK Teamb455b122019-09-27 18:10:33 -070043enum xnn_status xnn_create_average_pooling2d_nhwc_q8(
44 uint32_t input_padding_top,
45 uint32_t input_padding_right,
46 uint32_t input_padding_bottom,
47 uint32_t input_padding_left,
48 uint32_t pooling_height,
49 uint32_t pooling_width,
50 uint32_t stride_height,
51 uint32_t stride_width,
52 size_t channels,
53 size_t input_pixel_stride,
54 size_t output_pixel_stride,
55 uint8_t input_zero_point,
56 float input_scale,
57 uint8_t output_zero_point,
58 float output_scale,
59 uint8_t output_min,
60 uint8_t output_max,
61 uint32_t flags,
62 xnn_operator_t* average_pooling_op_out)
63{
64 xnn_operator_t average_pooling_op = NULL;
65 enum xnn_status status = xnn_status_uninitialized;
66
67 if (!xnn_params.initialized) {
68 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
69 goto error;
70 }
71
72 status = xnn_status_invalid_parameter;
73
74 const uint32_t pooling_size = pooling_height * pooling_width;
75 if (pooling_size == 0) {
76 xnn_log_error(
77 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
78 "pooling size dimensions must be non-zero",
79 pooling_width, pooling_height);
80 goto error;
81 }
82
83 if (pooling_size == 1) {
84 xnn_log_error(
85 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
86 goto error;
87 }
88
89 if (stride_height == 0 || stride_width == 0) {
90 xnn_log_error(
91 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
92 "stride dimensions must be non-zero",
93 stride_width, stride_height);
94 goto error;
95 }
96
97 if (channels == 0) {
98 xnn_log_error(
99 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
100 channels);
101 goto error;
102 }
103
104 if (input_pixel_stride < channels) {
105 xnn_log_error(
106 "failed to create Average Pooling operator with input pixel stride of %zu: "
107 "stride must be at least as large as the number of channels (%zu)",
108 input_pixel_stride, channels);
109 goto error;
110 }
111
112 if (output_pixel_stride < channels) {
113 xnn_log_error(
114 "failed to create Average Pooling operator with output pixel stride of %zu: "
115 "stride must be at least as large as the number of channels (%zu)",
116 output_pixel_stride, channels);
117 goto error;
118 }
119
120 if (input_scale <= 0.0f || !isnormal(input_scale)) {
121 xnn_log_error(
122 "failed to create Average Pooling operator with %.7g input scale: "
123 "scale must be finite, normalized, and positive",
124 input_scale);
125 goto error;
126 }
127
128 if (output_scale <= 0.0f || !isnormal(output_scale)) {
129 xnn_log_error(
130 "failed to create Average Pooling operator with %.7g output scale: "
131 "scale must be finite, normalized, and positive",
132 output_scale);
133 goto error;
134 }
135
136 if (output_min >= output_max) {
137 xnn_log_error(
138 "failed to create Average Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
139 "range min must be below range max",
140 output_min, output_max);
141 goto error;
142 }
143
Marat Dukhan466da752020-02-28 02:00:49 -0800144 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
145 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
146 if (any_padding) {
147 xnn_log_error(
148 "failed to create Average Pooling operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
149 "TensorFlow SAME padding can't be combined with explicit padding specification",
150 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
151 goto error;
152 }
153 }
154
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155 status = xnn_status_unsupported_parameter;
156
157 const float input_output_scale = input_scale / output_scale;
158 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
159 xnn_log_error(
160 "failed to create Average Pooling operator with %.7g input scale and %.7g output scale: "
161 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
162 input_scale, output_scale, input_output_scale);
163 goto error;
164 }
165
166 if (pooling_size >= 16777216) {
167 xnn_log_error(
168 "failed to create Average Pooling operator with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
169 "the number of elements in the pooling area must be below 2**24",
170 pooling_size, pooling_width, pooling_height);
171 goto error;
172 }
173
174 status = xnn_status_out_of_memory;
175
Marat Dukhan04f03be2019-11-19 12:36:47 -0800176 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700177 if (average_pooling_op == NULL) {
178 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
179 goto error;
180 }
181
Marat Dukhan5868d802020-03-19 17:18:45 -0700182 void* zero_buffer = xnn_allocate_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
183 if (zero_buffer == NULL) {
184 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
185 channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
186 goto error;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700187 }
Marat Dukhan5868d802020-03-19 17:18:45 -0700188 memset(zero_buffer, input_zero_point, channels * sizeof(uint8_t));
189 average_pooling_op->zero_buffer = zero_buffer;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700190
191 average_pooling_op->padding_top = input_padding_top;
192 average_pooling_op->padding_right = input_padding_right;
193 average_pooling_op->padding_bottom = input_padding_bottom;
194 average_pooling_op->padding_left = input_padding_left;
195
196 average_pooling_op->kernel_height = pooling_height;
197 average_pooling_op->kernel_width = pooling_width;
198 average_pooling_op->stride_height = stride_height;
199 average_pooling_op->stride_width = stride_width;
200 average_pooling_op->dilation_height = 1;
201 average_pooling_op->dilation_width = 1;
202 average_pooling_op->channels = channels;
203 average_pooling_op->input_pixel_stride = input_pixel_stride;
204 average_pooling_op->output_pixel_stride = output_pixel_stride;
205
Marat Dukhan1c8bc0c2020-03-20 00:21:58 -0700206 average_pooling_op->input_zero_point = input_zero_point;
207 average_pooling_op->output_zero_point = output_zero_point;
208 average_pooling_op->input_scale = input_scale;
209 average_pooling_op->output_scale = output_scale;
210 average_pooling_op->output_min = output_min;
211 average_pooling_op->output_max = output_max;
212
Marat Dukhan5868d802020-03-19 17:18:45 -0700213 // Number of rows read in the AVGPOOL micro-kernel.
214 const size_t avgpool_nrows =
215 round_up(doz(pooling_size, xnn_params.q8.avgpool.mr), xnn_params.q8.avgpool.qr) + xnn_params.q8.avgpool.mr;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700216 average_pooling_op->q8_avgpool_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700217 xnn_init_q8_avgpool_params(
Marat Dukhan5868d802020-03-19 17:18:45 -0700218 (int32_t) -((uint32_t) input_zero_point * (uint32_t) avgpool_nrows),
219 input_scale / (output_scale * (float) pooling_size),
220 output_zero_point, output_min, output_max);
221
Marat Dukhanefc47b82019-11-18 09:25:38 -0800222 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_q8;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
Marat Dukhan466da752020-02-28 02:00:49 -0800224 average_pooling_op->flags = flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700225
226 *average_pooling_op_out = average_pooling_op;
227 return xnn_status_success;
228
229error:
230 xnn_delete_operator(average_pooling_op);
231 return status;
232}
233
234enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
235 uint32_t input_padding_top,
236 uint32_t input_padding_right,
237 uint32_t input_padding_bottom,
238 uint32_t input_padding_left,
239 uint32_t pooling_height,
240 uint32_t pooling_width,
241 uint32_t stride_height,
242 uint32_t stride_width,
243 size_t channels,
244 size_t input_pixel_stride,
245 size_t output_pixel_stride,
246 float output_min,
247 float output_max,
248 uint32_t flags,
249 xnn_operator_t* average_pooling_op_out)
250{
251 xnn_operator_t average_pooling_op = NULL;
252 enum xnn_status status = xnn_status_uninitialized;
253
254 if (!xnn_params.initialized) {
255 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
256 goto error;
257 }
258
259 status = xnn_status_invalid_parameter;
260
261 const uint32_t pooling_size = pooling_height * pooling_width;
262 if (pooling_size == 0) {
263 xnn_log_error(
264 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
265 "pooling size dimensions must be non-zero",
266 pooling_width, pooling_height);
267 goto error;
268 }
269
270 if (pooling_size == 1) {
271 xnn_log_error(
272 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
273 goto error;
274 }
275
276 if (stride_height == 0 || stride_width == 0) {
277 xnn_log_error(
278 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
279 "stride dimensions must be non-zero",
280 stride_width, stride_height);
281 goto error;
282 }
283
284 if (channels == 0) {
285 xnn_log_error(
286 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
287 channels);
288 goto error;
289 }
290
291 if (input_pixel_stride < channels) {
292 xnn_log_error(
293 "failed to create Average Pooling operator with input pixel stride of %zu: "
294 "stride must be at least as large as the number of channels (%zu)",
295 input_pixel_stride, channels);
296 goto error;
297 }
298
299 if (output_pixel_stride < channels) {
300 xnn_log_error(
301 "failed to create Average Pooling operator with output pixel stride of %zu: "
302 "stride must be at least as large as the number of channels (%zu)",
303 output_pixel_stride, channels);
304 goto error;
305 }
306
307 if (isnan(output_min)) {
308 xnn_log_error(
309 "failed to create Average Pooling operator with NaN output lower bound: lower bound must be non-NaN");
310 goto error;
311 }
312
313 if (isnan(output_max)) {
314 xnn_log_error(
315 "failed to create Average Pooling operator with NaN output upper bound: upper bound must be non-NaN");
316 goto error;
317 }
318
319 if (output_min >= output_max) {
320 xnn_log_error(
321 "failed to create Average Pooling operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
322 output_min, output_max);
323 goto error;
324 }
325
Marat Dukhan466da752020-02-28 02:00:49 -0800326 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
327 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
328 if (any_padding) {
329 xnn_log_error(
330 "failed to create Average Pooling operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
331 "TensorFlow SAME padding can't be combined with explicit padding specification",
332 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
333 goto error;
334 }
335 }
336
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337 status = xnn_status_out_of_memory;
338
Marat Dukhan04f03be2019-11-19 12:36:47 -0800339 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700340 if (average_pooling_op == NULL) {
341 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
342 goto error;
343 }
344
Marat Dukhan5868d802020-03-19 17:18:45 -0700345 void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
346 if (zero_buffer == NULL) {
347 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
348 channels * sizeof(float) + XNN_EXTRA_BYTES);
349 goto error;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700350 }
Marat Dukhan5868d802020-03-19 17:18:45 -0700351 average_pooling_op->zero_buffer = zero_buffer;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352
353 average_pooling_op->padding_top = input_padding_top;
354 average_pooling_op->padding_right = input_padding_right;
355 average_pooling_op->padding_bottom = input_padding_bottom;
356 average_pooling_op->padding_left = input_padding_left;
357
358 average_pooling_op->kernel_height = pooling_height;
359 average_pooling_op->kernel_width = pooling_width;
360 average_pooling_op->stride_height = stride_height;
361 average_pooling_op->stride_width = stride_width;
362 average_pooling_op->dilation_height = 1;
363 average_pooling_op->dilation_width = 1;
364 average_pooling_op->channels = channels;
365 average_pooling_op->input_pixel_stride = input_pixel_stride;
366 average_pooling_op->output_pixel_stride = output_pixel_stride;
367
Marat Dukhanefc47b82019-11-18 09:25:38 -0800368 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
Marat Dukhan8452ff52020-04-08 20:44:58 -0700369 average_pooling_op->f32_scaleminmax_params =
370 xnn_init_f32_scaleminmax_params(1.0f / (float) pooling_size, output_min, output_max);
Marat Dukhan5868d802020-03-19 17:18:45 -0700371 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
372 if (any_padding || tf_same_padding) {
Marat Dukhaneb09a6b2020-04-08 17:34:32 -0700373 average_pooling_op->f32_minmax_params =
374 xnn_init_f32_minmax_params(output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700375 average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
376 } else {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700377 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
378 }
Marat Dukhan466da752020-02-28 02:00:49 -0800379 average_pooling_op->flags = flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700380
381 *average_pooling_op_out = average_pooling_op;
382 return xnn_status_success;
383
384error:
385 xnn_delete_operator(average_pooling_op);
386 return status;
387}
388
Marat Dukhand6a54632020-03-11 06:22:45 -0700389static enum xnn_status setup_average_pooling2d(
390 xnn_operator_t average_pooling_op,
391 size_t batch_size,
392 size_t input_height,
393 size_t input_width,
394 const void* input,
395 void* output,
396 uint32_t log2_input_element_size,
397 uint32_t log2_output_element_size,
Marat Dukhanf196d012020-04-15 11:50:03 -0700398 struct avgpool_parameters avgpool[restrict XNN_MIN_ELEMENTS(1)],
Marat Dukhand6a54632020-03-11 06:22:45 -0700399 struct pavgpool_parameters pavgpool[restrict 1],
Marat Dukhanf196d012020-04-15 11:50:03 -0700400 struct gavgpool_parameters gavgpool[restrict XNN_MIN_ELEMENTS(1)],
Marat Dukhand6a54632020-03-11 06:22:45 -0700401 const void* params,
402 size_t params_size,
Marat Dukhan5868d802020-03-19 17:18:45 -0700403 const void* global_params,
404 size_t global_params_size,
Marat Dukhand6a54632020-03-11 06:22:45 -0700405 size_t num_threads,
406 bool is_pixelwise)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700407{
Marat Dukhand6a54632020-03-11 06:22:45 -0700408 assert(!is_pixelwise || pavgpool != NULL);
409
XNNPACK Teamb455b122019-09-27 18:10:33 -0700410 average_pooling_op->state = xnn_run_state_invalid;
411
412 if (!xnn_params.initialized) {
413 xnn_log_error("failed to setup Average Pooling operator: XNNPACK is not initialized");
414 return xnn_status_uninitialized;
415 }
416
417 if (input_width == 0 || input_height == 0) {
418 xnn_log_error(
419 "failed to setup Average Pooling operator with %zux%zu input: input dimensions must be non-zero",
420 input_width, input_height);
421 return xnn_status_invalid_parameter;
422 }
423
424 if (batch_size == 0) {
425 average_pooling_op->state = xnn_run_state_skip;
426 return xnn_status_success;
427 }
428
XNNPACK Teamb455b122019-09-27 18:10:33 -0700429 average_pooling_op->input_height = input_height;
430 average_pooling_op->input_width = input_width;
431 average_pooling_op->input = input;
432
Marat Dukhan5868d802020-03-19 17:18:45 -0700433 const bool tf_same_padding = (average_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
434 if (tf_same_padding) {
Marat Dukhan466da752020-02-28 02:00:49 -0800435 average_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
436 input_height, average_pooling_op->stride_height);
437 average_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
438 input_width, average_pooling_op->stride_width);
439
Marat Dukhan5868d802020-03-19 17:18:45 -0700440 const uint32_t kernel_height = average_pooling_op->kernel_height;
441 const uint32_t kernel_width = average_pooling_op->kernel_width;
Marat Dukhan466da752020-02-28 02:00:49 -0800442 const uint32_t total_padding_height =
Marat Dukhan5868d802020-03-19 17:18:45 -0700443 (average_pooling_op->output_height - 1) * average_pooling_op->stride_height + kernel_height - input_height;
Marat Dukhan466da752020-02-28 02:00:49 -0800444 const uint32_t total_padding_width =
Marat Dukhan5868d802020-03-19 17:18:45 -0700445 (average_pooling_op->output_width - 1) * average_pooling_op->stride_width + kernel_width - input_width;
Marat Dukhan466da752020-02-28 02:00:49 -0800446 average_pooling_op->padding_top = total_padding_height / 2;
447 average_pooling_op->padding_left = total_padding_width / 2;
448 average_pooling_op->padding_bottom = total_padding_height - average_pooling_op->padding_top;
449 average_pooling_op->padding_right = total_padding_width - average_pooling_op->padding_left;
450 } else {
451 average_pooling_op->output_height = compute_output_dimension(
452 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
453 average_pooling_op->kernel_height,
454 average_pooling_op->stride_height);
455 average_pooling_op->output_width = compute_output_dimension(
456 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
457 average_pooling_op->kernel_width,
458 average_pooling_op->stride_width);
459 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700460 average_pooling_op->output = output;
461
XNNPACK Teamb455b122019-09-27 18:10:33 -0700462 const size_t output_height = average_pooling_op->output_height;
463 const size_t output_width = average_pooling_op->output_width;
Marat Dukhan1c8bc0c2020-03-20 00:21:58 -0700464 const size_t padded_input_width = average_pooling_op->padding_left + input_width + average_pooling_op->padding_right;
465 const size_t padded_input_height = average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom;
466 if (padded_input_width == average_pooling_op->kernel_width && padded_input_height == average_pooling_op->kernel_height) {
Marat Dukhan5868d802020-03-19 17:18:45 -0700467 // Global average pooling
468 const size_t input_elements = input_height * input_width;
469 const size_t input_stride_in_bytes = average_pooling_op->input_pixel_stride << log2_input_element_size;
470 const size_t channels = average_pooling_op->channels;
471 average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
472 .input = input,
473 .zero = average_pooling_op->zero_buffer,
474 .input_pixel_stride = input_stride_in_bytes,
475 .input_batch_stride = input_stride_in_bytes * input_elements,
476 .input_elements = input_elements,
477 .channels = channels,
478 .output = output,
479 .output_batch_stride = average_pooling_op->output_pixel_stride << log2_output_element_size,
Marat Dukhand6a54632020-03-11 06:22:45 -0700480 };
Marat Dukhan5868d802020-03-19 17:18:45 -0700481 memcpy(&average_pooling_op->context.global_average_pooling_nwc.params, global_params, global_params_size);
482 average_pooling_op->compute.type = xnn_parallelization_type_1d;
483 average_pooling_op->compute.range[0] = batch_size;
484
485 if (input_elements <= gavgpool->mr) {
486 average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
487 average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = gavgpool->up;
Marat Dukhand6a54632020-03-11 06:22:45 -0700488 } else {
Marat Dukhan5868d802020-03-19 17:18:45 -0700489 average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
490 average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = gavgpool->mp;
Marat Dukhand6a54632020-03-11 06:22:45 -0700491 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700492 } else {
Marat Dukhan5868d802020-03-19 17:18:45 -0700493 // Non-global average pooling
494 const size_t pooling_height = average_pooling_op->kernel_height;
495 const size_t pooling_width = average_pooling_op->kernel_width;
496 const size_t pooling_size = pooling_height * pooling_width;
497
498 const uint32_t mr = is_pixelwise ? pavgpool->mr : avgpool->mr;
499
500 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
501 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
502
503 const size_t last_input_height = average_pooling_op->last_input_height;
504 const size_t last_input_width = average_pooling_op->last_input_width;
505 if (input_height != last_input_height || input_width != last_input_width) {
506 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
507 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
508
509 const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
510 if (indirection_buffer == NULL) {
511 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
512 return xnn_status_out_of_memory;
513 }
514 average_pooling_op->indirection_buffer = indirection_buffer;
515
516 // Indirection buffer always setup for batch size 1, larger batch size supported through input_offset argument
517 average_pooling_op->batch_size = 1;
518 xnn_indirection_init_dwconv2d(
519 average_pooling_op, 0, step_height, step_width, log2_input_element_size);
520
521 average_pooling_op->last_input = input;
522 average_pooling_op->last_input_height = input_height;
523 average_pooling_op->last_input_width = input_width;
Marat Dukhand6a54632020-03-11 06:22:45 -0700524 }
Marat Dukhan5868d802020-03-19 17:18:45 -0700525
526 const size_t channels = average_pooling_op->channels;
527
528 const size_t indirect_input_height_stride = step_height * sizeof(void*);
529 const size_t output_width_stride = average_pooling_op->output_pixel_stride << log2_output_element_size;
530 const size_t output_height_stride = output_width * output_width_stride;
531
532 if (is_pixelwise) {
533 /* This part is specific to FP32, needs revision if Q8 gets a PAVGPOOL micro-kernel */
534 if (input_height != last_input_height || input_width != last_input_width) {
535 const size_t pixelwise_buffer_size = output_height * output_width * sizeof(float);
536 float* pixelwise_buffer = (float*) xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
537 if (pixelwise_buffer == NULL) {
538 xnn_log_error("failed to allocate %zu bytes for pixelwise buffer", pixelwise_buffer_size);
539 return xnn_status_out_of_memory;
540 }
541 average_pooling_op->pixelwise_buffer = pixelwise_buffer;
542
543 float* pixelwise_pointer = pixelwise_buffer;
544 for (size_t output_y = 0; output_y < output_height; output_y++) {
545 const size_t input_y_start = doz(output_y * average_pooling_op->stride_height, average_pooling_op->padding_top);
546 const size_t input_y_end =
547 min(doz(output_y * average_pooling_op->stride_height + average_pooling_op->kernel_height, average_pooling_op->padding_top), input_height);
548 const uint32_t input_y_range = (uint32_t) (input_y_end - input_y_start);
549 for (size_t output_x = 0; output_x < output_width; output_x++) {
550 const size_t input_x_start = doz(output_x * average_pooling_op->stride_width, average_pooling_op->padding_left);
551 const size_t input_x_end =
552 min(doz(output_x * average_pooling_op->stride_width + average_pooling_op->kernel_width, average_pooling_op->padding_left), input_width);
553 const uint32_t input_x_range = (uint32_t) (input_x_end - input_x_start);
554 *pixelwise_pointer++ = 1.0f / ((float) (int32_t) (input_y_range * input_x_range));
555 }
556 }
557 }
558
559 const uint32_t qr = pavgpool->qr;
560 const size_t multipass_adjustment =
561 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
562 average_pooling_op->context.pixelwise_average_pooling = (struct pixelwise_average_pooling_context) {
563 .indirect_input = average_pooling_op->indirection_buffer,
564 .indirect_input_height_stride = indirect_input_height_stride,
565 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_input_element_size,
566 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
567 .pixelwise_buffer = average_pooling_op->pixelwise_buffer,
568 .pixelwise_buffer_height_stride = output_width * sizeof(float),
569 .output = output,
570 .output_batch_stride = output_height * output_height_stride,
571 .output_height_stride = output_height_stride,
572 .output_width = output_width,
573 .pooling_size = pooling_size,
574 .channels = channels,
575 .zero = average_pooling_op->zero_buffer,
576 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
577 .output_increment = output_width_stride - (channels << log2_output_element_size),
578 };
579 memcpy(&average_pooling_op->context.pixelwise_average_pooling.params, params, params_size);
580 if (pooling_size <= mr) {
581 average_pooling_op->context.pixelwise_average_pooling.unipass_ukernel = pavgpool->up;
582 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_unipass;
583 } else {
584 average_pooling_op->context.pixelwise_average_pooling.multipass_ukernel = pavgpool->mp;
585 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_multipass;
586 }
587 } else {
588 const uint32_t qr = avgpool->qr;
589 const size_t multipass_adjustment =
590 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
591 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
592 .indirect_input = average_pooling_op->indirection_buffer,
593 .indirect_input_height_stride = indirect_input_height_stride,
594 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
595 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_input_element_size,
596 .output = output,
597 .output_batch_stride = output_height * output_height_stride,
598 .output_height_stride = output_height_stride,
599 .output_width = output_width,
600 .pooling_size = pooling_size,
601 .channels = channels,
602 .zero = average_pooling_op->zero_buffer,
603 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
604 .output_increment = output_width_stride - (channels << log2_output_element_size),
Marat Dukhan8452ff52020-04-08 20:44:58 -0700605 .params.f32 = average_pooling_op->f32_scaleminmax_params,
Marat Dukhan5868d802020-03-19 17:18:45 -0700606 };
607 memcpy(&average_pooling_op->context.average_pooling.params, params, params_size);
608 if (pooling_size <= mr) {
609 average_pooling_op->context.average_pooling.unipass_ukernel = avgpool->up;
610 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
611 } else {
612 average_pooling_op->context.average_pooling.multipass_ukernel = avgpool->mp;
613 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
614 }
615 }
616 average_pooling_op->compute.type = xnn_parallelization_type_2d;
617 average_pooling_op->compute.range[0] = batch_size;
618 average_pooling_op->compute.range[1] = output_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700619 }
620 average_pooling_op->state = xnn_run_state_ready;
621
XNNPACK Teamb455b122019-09-27 18:10:33 -0700622 return xnn_status_success;
623}
624
Marat Dukhand6a54632020-03-11 06:22:45 -0700625enum xnn_status xnn_setup_average_pooling2d_nhwc_q8(
626 xnn_operator_t average_pooling_op,
627 size_t batch_size,
628 size_t input_height,
629 size_t input_width,
630 const uint8_t* input,
631 uint8_t* output,
632 pthreadpool_t threadpool)
633{
634 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_q8) {
635 xnn_log_error("failed to setup Average Pooling (Q8) operator: operator type mismatch");
636 return xnn_status_invalid_parameter;
637 }
638
639 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling);
640
Marat Dukhan1c8bc0c2020-03-20 00:21:58 -0700641 // Number of rows read in the GAVGPOOL micro-kernel.
642 const size_t input_size = input_height * input_width;
643 const size_t pooling_size = average_pooling_op->kernel_height * average_pooling_op->kernel_width;
644 const size_t gavgpool_nrows = round_up(input_size, xnn_params.q8.gavgpool.mr);
645 average_pooling_op->q8_gavgpool_params =
646 xnn_init_q8_avgpool_params(
647 (int32_t) -((uint32_t) average_pooling_op->input_zero_point * (uint32_t) gavgpool_nrows),
648 average_pooling_op->input_scale / (average_pooling_op->output_scale * (float) pooling_size),
649 average_pooling_op->output_zero_point,
650 average_pooling_op->output_min,
651 average_pooling_op->output_max);
652
Marat Dukhand6a54632020-03-11 06:22:45 -0700653 return setup_average_pooling2d(
654 average_pooling_op,
655 batch_size, input_height, input_width,
656 input, output,
657 0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
658 0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
659 &xnn_params.q8.avgpool,
Marat Dukhan5868d802020-03-19 17:18:45 -0700660 NULL /* no PAVGPOOL micro-kernel */,
661 &xnn_params.q8.gavgpool,
Marat Dukhand6a54632020-03-11 06:22:45 -0700662 &average_pooling_op->q8_avgpool_params,
663 sizeof(average_pooling_op->q8_avgpool_params),
Marat Dukhan5868d802020-03-19 17:18:45 -0700664 &average_pooling_op->q8_gavgpool_params,
665 sizeof(average_pooling_op->q8_gavgpool_params),
Marat Dukhand6a54632020-03-11 06:22:45 -0700666 pthreadpool_get_threads_count(threadpool),
667 false /* pixelwise not supported */);
668}
669
XNNPACK Teamb455b122019-09-27 18:10:33 -0700670enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
671 xnn_operator_t average_pooling_op,
672 size_t batch_size,
673 size_t input_height,
674 size_t input_width,
675 const float* input,
676 float* output,
677 pthreadpool_t threadpool)
678{
Marat Dukhanefc47b82019-11-18 09:25:38 -0800679 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700680 xnn_log_error("failed to setup Average Pooling (F32) operator: operator type mismatch");
681 return xnn_status_invalid_parameter;
682 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700683
Marat Dukhand6a54632020-03-11 06:22:45 -0700684 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling ||
685 average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700686
Marat Dukhan5868d802020-03-19 17:18:45 -0700687 const bool is_pixelwise = average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling;
Marat Dukhan1c8bc0c2020-03-20 00:21:58 -0700688 if (is_pixelwise) {
689 const size_t input_size = input_height * input_width;
Marat Dukhan8452ff52020-04-08 20:44:58 -0700690 xnn_update_f32_scaleminmax_params(&average_pooling_op->f32_scaleminmax_params, 1.0f / (float) input_size);
Marat Dukhan1c8bc0c2020-03-20 00:21:58 -0700691 }
Marat Dukhan5868d802020-03-19 17:18:45 -0700692
Marat Dukhand6a54632020-03-11 06:22:45 -0700693 return setup_average_pooling2d(
694 average_pooling_op,
695 batch_size, input_height, input_width,
696 input, output,
697 2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
698 2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
699 &xnn_params.f32.avgpool,
700 &xnn_params.f32.pavgpool,
Marat Dukhan5868d802020-03-19 17:18:45 -0700701 &xnn_params.f32.gavgpool,
Marat Dukhan8452ff52020-04-08 20:44:58 -0700702 is_pixelwise ? (const void*) &average_pooling_op->f32_minmax_params : (const void*) &average_pooling_op->f32_scaleminmax_params,
703 is_pixelwise ? sizeof(average_pooling_op->f32_minmax_params) : sizeof(average_pooling_op->f32_scaleminmax_params),
704 &average_pooling_op->f32_scaleminmax_params,
705 sizeof(average_pooling_op->f32_scaleminmax_params),
Marat Dukhand6a54632020-03-11 06:22:45 -0700706 pthreadpool_get_threads_count(threadpool),
Marat Dukhan5868d802020-03-19 17:18:45 -0700707 is_pixelwise);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700708}